Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 38c83a0

Browse files
committed
net, bpf: Fix sk_user_data pointer corruption on 32-bit
The Fixes: commit made use of the lower 3 bits of (void *)sk->sk_user_data for flags, and refactored to simplify adding even more. This change immediately broke 32-bit usage: in BPF's reuseport_array for example, 'struct reuseport_array' has an array 'struct sock __rcu *ptrs[]' whose members must be cleared on socket close via now-broken references from sk->sk_user_data. This leads to subtle memory corruption and lock issues that result in kernel hangs and panics while running BPF selftests: root@qemu-armhf:/usr/libexec/kselftests-bpf# test_progs -a select_reuseport bpf_testmod.ko is already unloaded. Loading bpf_testmod.ko... Successfully loaded bpf_testmod.ko. test_config:PASS:netns_new 0 nsec torvalds#356/1 select_reuseport/reuseport_sockarray IPv4/TCP LOOPBACK test_err_inner_map:OK [...] ------------[ cut here ]------------ WARNING: CPU: 0 PID: 87 at kernel/locking/lockdep.c:238 __lock_acquire+0xac0/0xd1c DEBUG_LOCKS_WARN_ON(1) Modules linked in: bpf_testmod(OE) bpf_preload CPU: 0 UID: 0 PID: 87 Comm: test_progs Tainted: G OE 6.17.0-rc1-00233-ge37b36224f81-dirty torvalds#114 NONE Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Generic DT based system Call trace: dump_backtrace from show_stack+0x20/0x24 r7:c01e2ebc r6:00000080 r5:60010093 r4:c14d3d80 show_stack from dump_stack_lvl+0x90/0xc0 dump_stack_lvl from dump_stack+0x18/0x1c r7:c01e2ebc r6:00000009 r5:000000ee r4:c14c5bc4 dump_stack from __warn+0x8c/0x1b4 __warn from warn_slowpath_fmt+0x130/0x1a4 r8:c01e2ebc r7:c14bd144 r6:c14c5bc4 r5:c3cad400 r4:c1cf8a04 warn_slowpath_fmt from __lock_acquire+0xac0/0xd1c r8:c2896b50 r7:00000000 r6:c58863b8 r5:c3cad400 r4:c3cadcc0 __lock_acquire from lock_acquire.part.0+0xbc/0x240 r10:00000000 r9:1c0ed000 r8:00000000 r7:60010013 r6:c1b902f0 r5:c1b902f0 r4:df865cd0 lock_acquire.part.0 from lock_acquire+0x90/0x168 r10:c5886100 r9:c46a6c04 r8:00000000 r7:00000000 r6:00000000 r5:00000000 r4:c58863b8 lock_acquire from _raw_write_lock_bh+0x54/0x90 r9:c46a6c04 r8:00000000 r7:00000055 r6:c58863b8 r5:c58863a8 r4:c0394774 _raw_write_lock_bh from bpf_fd_reuseport_array_update_elem+0x16c/0x26c r6:c59a4000 r5:c5191400 r4:c58863a8 bpf_fd_reuseport_array_update_elem from bpf_map_update_value+0x454/0x5dc r10:c329a901 r9:c329a900 r8:c1cf72f0 r7:c3cad400 r6:c595dc00 r5:00000000 r4:00000000 bpf_map_update_value from map_update_elem+0x210/0x430 r10:c329a901 r9:00000004 r8:c595df40 r7:df865ec0 r6:c329a900 r5:c46a6c00 r4:c46a6cf8 map_update_elem from __sys_bpf+0x594/0xc94 r10:00000000 r9:befb18b0 r8:00000051 r7:00000000 r6:00000002 r5:df865eb0 r4:00000020 __sys_bpf from sys_bpf+0x34/0x3c r10:00000182 r9:c3cad400 r8:c0100234 r7:00000182 r6:00000002 r5:befb18b0 r4:00000020 sys_bpf from ret_fast_syscall+0x0/0x1c Exception stack(0xdf865fa8 to 0xdf865ff0) 5fa0: 00000020 befb18b0 00000002 befb18b0 00000020 00000000 5fc0: 00000020 befb18b0 00000002 00000182 00839395 b6fa3ce0 00000000 012ac774 5fe0: befb1880 befb1870 00863133 b6ec3312 irq event stamp: 260676 hardirqs last enabled at (260676): [<c0149fac>] __local_bh_enable_ip+0xc4/0x1b0 hardirqs last disabled at (260675): [<c014a024>] __local_bh_enable_ip+0x13c/0x1b0 softirqs last enabled at (260668): [<c0a1c31c>] release_sock+0x94/0x98 softirqs last disabled at (260674): [<c03946f4>] bpf_fd_reuseport_array_update_elem+0xec/0x26c ---[ end trace 0000000000000000 ]--- Reviewing kernel usage of sk->sk_user_data and the current flag bits: #define SK_USER_DATA_NOCOPY 1UL #define SK_USER_DATA_BPF 2UL #define SK_USER_DATA_PSOCK 4UL reveals that SK_USER_DATA_PSOCK and SK_USER_DATA_BPF both imply SK_USER_DATA_NOCOPY, and suggests we can instead use an equivalent 2-bit enum like: enum sk_user_data { SK_USER_DATA_NONE = 0, SK_USER_DATA_NOCOPY = 1, SK_USER_DATA_BPF = 2, SK_USER_DATA_PSOCK = 3, }; Implement this to fix the pointer corruption, and update related call signatures and comments to clarify the change from multiple flag bits to an enum value, with a note highlighting the 2-bit limitation. Fixes: 2a01337 ("net: fix refcount bug in sk_psock_get (2)") Signed-off-by: Tony Ambardar <[email protected]>
1 parent e0a5c9b commit 38c83a0

File tree

5 files changed

+48
-40
lines changed

5 files changed

+48
-40
lines changed

include/linux/skmsg.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,8 @@ static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start)
291291

292292
static inline struct sk_psock *sk_psock(const struct sock *sk)
293293
{
294-
return __rcu_dereference_sk_user_data_with_flags(sk,
295-
SK_USER_DATA_PSOCK);
294+
return __rcu_dereference_sk_user_data_with_flag(sk,
295+
SK_USER_DATA_PSOCK);
296296
}
297297

298298
static inline void sk_psock_set_state(struct sk_psock *psock,

include/net/sock.h

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -584,97 +584,108 @@ enum sk_pacing {
584584
SK_PACING_FQ = 2,
585585
};
586586

587-
/* flag bits in sk_user_data
587+
/* Flag values encoded in LSBs of sk->sk_user_data:
588+
*
589+
* - SK_USER_DATA_NONE: Pointer with no special meaning.
588590
*
589591
* - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might
590592
* not be suitable for copying when cloning the socket. For instance,
591-
* it can point to a reference counted object. sk_user_data bottom
592-
* bit is set if pointer must not be copied.
593+
* it can point to a reference counted object.
593594
*
594595
* - SK_USER_DATA_BPF: Mark whether sk_user_data field is
595-
* managed/owned by a BPF reuseport array. This bit should be set
596-
* when sk_user_data's sk is added to the bpf's reuseport_array.
596+
* managed/owned by a BPF reuseport array. This also implies
597+
* SK_USER_DATA_NOCOPY, and should be set when sk_user_data's sk
598+
* is added to the bpf's reuseport_array.
597599
*
598600
* - SK_USER_DATA_PSOCK: Mark whether pointer stored in
599-
* sk_user_data points to psock type. This bit should be set
600-
* when sk_user_data is assigned to a psock object.
601+
* sk_user_data points to psock type. This also implies
602+
* SK_USER_DATA_NOCOPY, and should be set when sk_user_data is
603+
* is assigned to a psock object.
604+
*
605+
* NOTE: Further values *must not* be added to the enum below, as they
606+
* already span lower 2 bits of sk_user_data field, and more entries
607+
* would first break 32-bit and then 64-bit pointers.
601608
*/
602-
#define SK_USER_DATA_NOCOPY 1UL
603-
#define SK_USER_DATA_BPF 2UL
604-
#define SK_USER_DATA_PSOCK 4UL
605-
#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\
606-
SK_USER_DATA_PSOCK)
609+
610+
enum sk_user_data {
611+
SK_USER_DATA_NONE = 0,
612+
SK_USER_DATA_NOCOPY = 1,
613+
SK_USER_DATA_BPF = 2,
614+
SK_USER_DATA_PSOCK = 3,
615+
};
616+
617+
#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_PSOCK)
607618

608619
/**
609620
* sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
610621
* @sk: socket
611622
*/
612623
static inline bool sk_user_data_is_nocopy(const struct sock *sk)
613624
{
614-
return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY);
625+
return ((uintptr_t)sk->sk_user_data & ~SK_USER_DATA_PTRMASK);
615626
}
616627

617628
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
618629

619630
/**
620-
* __locked_read_sk_user_data_with_flags - return the pointer
621-
* only if argument flags all has been set in sk_user_data. Otherwise
631+
* __locked_read_sk_user_data_with_flag - return the pointer only
632+
* if argument flag matches that set in sk_user_data. Otherwise
622633
* return NULL
623634
*
624635
* @sk: socket
625-
* @flags: flag bits
636+
* @flag: enum value
626637
*
627638
* The caller must be holding sk->sk_callback_lock.
628639
*/
629640
static inline void *
630-
__locked_read_sk_user_data_with_flags(const struct sock *sk,
631-
uintptr_t flags)
641+
__locked_read_sk_user_data_with_flag(const struct sock *sk,
642+
enum sk_user_data flag)
632643
{
633644
uintptr_t sk_user_data =
634645
(uintptr_t)rcu_dereference_check(__sk_user_data(sk),
635646
lockdep_is_held(&sk->sk_callback_lock));
636647

637-
WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
648+
WARN_ON_ONCE(flag & SK_USER_DATA_PTRMASK);
638649

639-
if ((sk_user_data & flags) == flags)
650+
if ((sk_user_data & ~SK_USER_DATA_PTRMASK) == flag)
640651
return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
641652
return NULL;
642653
}
643654

644655
/**
645-
* __rcu_dereference_sk_user_data_with_flags - return the pointer
646-
* only if argument flags all has been set in sk_user_data. Otherwise
656+
* __rcu_dereference_sk_user_data_with_flag - return the pointer only
657+
* if argument flag matches that set in sk_user_data. Otherwise
647658
* return NULL
648659
*
649660
* @sk: socket
650-
* @flags: flag bits
661+
* @flag: enum value
651662
*/
652663
static inline void *
653-
__rcu_dereference_sk_user_data_with_flags(const struct sock *sk,
654-
uintptr_t flags)
664+
__rcu_dereference_sk_user_data_with_flag(const struct sock *sk,
665+
enum sk_user_data flag)
655666
{
656667
uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk));
657668

658-
WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK);
669+
WARN_ON_ONCE(flag & SK_USER_DATA_PTRMASK);
659670

660-
if ((sk_user_data & flags) == flags)
671+
if ((sk_user_data & ~SK_USER_DATA_PTRMASK) == flag)
661672
return (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
662673
return NULL;
663674
}
664675

665676
#define rcu_dereference_sk_user_data(sk) \
666-
__rcu_dereference_sk_user_data_with_flags(sk, 0)
667-
#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \
677+
__rcu_dereference_sk_user_data_with_flag(sk, 0)
678+
#define __rcu_assign_sk_user_data_with_flag(sk, ptr, flag) \
668679
({ \
669680
uintptr_t __tmp1 = (uintptr_t)(ptr), \
670-
__tmp2 = (uintptr_t)(flags); \
681+
__tmp2 = (uintptr_t)(flag); \
671682
WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \
672683
WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \
673684
rcu_assign_pointer(__sk_user_data((sk)), \
674685
__tmp1 | __tmp2); \
675686
})
676687
#define rcu_assign_sk_user_data(sk, ptr) \
677-
__rcu_assign_sk_user_data_with_flags(sk, ptr, 0)
688+
__rcu_assign_sk_user_data_with_flag(sk, ptr, 0)
678689

679690
static inline
680691
struct net *sock_net(const struct sock *sk)

kernel/bpf/reuseport_array.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk)
2424
struct sock __rcu **socks;
2525

2626
write_lock_bh(&sk->sk_callback_lock);
27-
socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF);
27+
socks = __locked_read_sk_user_data_with_flag(sk, SK_USER_DATA_BPF);
2828
if (socks) {
2929
WRITE_ONCE(sk->sk_user_data, NULL);
3030
/*
@@ -290,8 +290,7 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
290290
if (err)
291291
goto put_file_unlock;
292292

293-
sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY |
294-
SK_USER_DATA_BPF;
293+
sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_BPF;
295294
WRITE_ONCE(nsk->sk_user_data, (void *)sk_user_data);
296295
rcu_assign_pointer(array->ptrs[index], nsk);
297296
free_osk = osk;

net/core/skmsg.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -769,9 +769,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
769769
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
770770
refcount_set(&psock->refcnt, 1);
771771

772-
__rcu_assign_sk_user_data_with_flags(sk, psock,
773-
SK_USER_DATA_NOCOPY |
774-
SK_USER_DATA_PSOCK);
772+
__rcu_assign_sk_user_data_with_flag(sk, psock, SK_USER_DATA_PSOCK);
775773
sock_hold(sk);
776774

777775
out:

net/smc/smc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ static inline void smc_init_saved_callbacks(struct smc_sock *smc)
343343
static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
344344
{
345345
return (struct smc_sock *)
346-
((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
346+
((uintptr_t)clcsk->sk_user_data & SK_USER_DATA_PTRMASK);
347347
}
348348

349349
/* save target_cb in saved_cb, and replace target_cb with new_cb */

0 commit comments

Comments
 (0)