Compare commits

...

10 commits

Author SHA1 Message Date
Denis
ae9d88e022
Update from v6.1.80 to v6.1.81 2024-03-06 18:01:09 +01:00
Greg Kroah-Hartman
a394a55037
Linux 6.1.81
Link: https://lore.kernel.org/r/20240304211556.993132804@linuxfoundation.org
Tested-by: SeongJae Park <sj@kernel.org>
Tested-by: Ron Economos <re@w6rz.net>
Tested-by: Salvatore Bonaccorso <carnil@debian.org>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Pavel Machek (CIP) <pavel@denx.de>
Tested-by: Shuah Khan <skhan@linuxfoundation.org>
Tested-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Tested-by: Florian Fainelli <florian.fainelli@broadcom.com>
Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
Tested-by: Yann Sionneau <ysionneau@kalrayinc.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:45 +01:00
Maximilian Heyne
4941656e98
xen/events: close evtchn after mapping cleanup
commit fa765c4b4a upstream.

shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.

This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
  channel:

  CPU0                        CPU1
  shutdown_pirq {
    xen_evtchn_close(e)
                              __startup_pirq {
                                EVTCHNOP_bind_pirq
                                  -> returns just freed evtchn e
                                set_evtchn_to_irq(e, irq)
                              }
    xen_irq_info_cleanup() {
      set_evtchn_to_irq(e, -1)
    }
  }

  Assume here event channel e refers here to the same event channel
  number.
  After this race the evtchn_to_irq mapping for e is invalid (-1).

- __startup_pirq races with __unbind_from_irq in a similar way. Because
  __startup_pirq doesn't take irq_mapping_update_lock it can grab the
  evtchn that __unbind_from_irq is currently freeing and cleaning up. In
  this case even though the event channel is allocated, its mapping can
  be unset in evtchn_to_irq.

The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.

On a 5.10 kernel prior to commit 3fcdaf3d76 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.

  ------------[ cut here ]------------
  blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
  kernel BUG at drivers/xen/events/events_base.c:499!
  invalid opcode: 0000 [#1] SMP PTI
  CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
  Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
  Workqueue: nvme-reset-wq nvme_reset_work
  RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
  Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
  RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
  RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
  RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
  R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
  R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
  FS:  0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   ? show_trace_log_lvl+0x1c1/0x2d9
   ? show_trace_log_lvl+0x1c1/0x2d9
   ? set_affinity_irq+0xdc/0x1c0
   ? __die_body.cold+0x8/0xd
   ? die+0x2b/0x50
   ? do_trap+0x90/0x110
   ? bind_evtchn_to_cpu+0xdf/0xf0
   ? do_error_trap+0x65/0x80
   ? bind_evtchn_to_cpu+0xdf/0xf0
   ? exc_invalid_op+0x4e/0x70
   ? bind_evtchn_to_cpu+0xdf/0xf0
   ? asm_exc_invalid_op+0x12/0x20
   ? bind_evtchn_to_cpu+0xdf/0xf0
   ? bind_evtchn_to_cpu+0xc5/0xf0
   set_affinity_irq+0xdc/0x1c0
   irq_do_set_affinity+0x1d7/0x1f0
   irq_setup_affinity+0xd6/0x1a0
   irq_startup+0x8a/0xf0
   __setup_irq+0x639/0x6d0
   ? nvme_suspend+0x150/0x150
   request_threaded_irq+0x10c/0x180
   ? nvme_suspend+0x150/0x150
   pci_request_irq+0xa8/0xf0
   ? __blk_mq_free_request+0x74/0xa0
   queue_request_irq+0x6f/0x80
   nvme_create_queue+0x1af/0x200
   nvme_create_io_queues+0xbd/0xf0
   nvme_setup_io_queues+0x246/0x320
   ? nvme_irq_check+0x30/0x30
   nvme_reset_work+0x1c8/0x400
   process_one_work+0x1b0/0x350
   worker_thread+0x49/0x310
   ? process_one_work+0x350/0x350
   kthread+0x11b/0x140
   ? __kthread_bind_mask+0x60/0x60
   ret_from_fork+0x22/0x30
  Modules linked in:
  ---[ end trace a11715de1eee1873 ]---

Fixes: d46a78b05c ("xen: implement pirq type event channels")
Cc: stable@vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki@amazon.com>
Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:26 +01:00
Ard Biesheuvel
f8e7127258
x86/efistub: Give up if memory attribute protocol returns an error
commit a7a6a01f88 upstream.

The recently introduced EFI memory attributes protocol should be used
if it exists to ensure that the memory allocation created for the kernel
permits execution. This is needed for compatibility with tightened
requirements related to Windows logo certification for x86 PCs.

Currently, we simply strip the execute protect (XP) attribute from the
entire range, but this might be rejected under some firmware security
policies, and so in a subsequent patch, this will be changed to only
strip XP from the executable region that runs early, and make it
read-only (RO) as well.

In order to catch any issues early, ensure that the memory attribute
protocol works as intended, and give up if it produces spurious errors.

Note that the DXE services based fallback was always based on best
effort, so don't propagate any errors returned by that API.

Fixes: a1b87d54f4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:25 +01:00
Martynas Pumputis
395183a6f9
bpf: Derive source IP addr via bpf_*_fib_lookup()
commit dab4e1f06c upstream.

Extend the bpf_fib_lookup() helper by making it to return the source
IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set.

For example, the following snippet can be used to derive the desired
source IP address:

    struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr };

    ret = bpf_skb_fib_lookup(skb, p, sizeof(p),
            BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH);
    if (ret != BPF_FIB_LKUP_RET_SUCCESS)
        return TC_ACT_SHOT;

    /* the p.ipv4_src now contains the source address */

The inability to derive the proper source address may cause malfunctions
in BPF-based dataplanes for hosts containing netdevs with more than one
routable IP address or for multi-homed hosts.

For example, Cilium implements packet masquerading in BPF. If an
egressing netdev to which the Cilium's BPF prog is attached has
multiple IP addresses, then only one [hardcoded] IP address can be used for
masquerading. This breaks connectivity if any other IP address should have
been selected instead, for example, when a public and private addresses
are attached to the same egress interface.

The change was tested with Cilium [1].

Nikolay Aleksandrov helped to figure out the IPv6 addr selection.

[1]: https://github.com/cilium/cilium/pull/28283

Signed-off-by: Martynas Pumputis <m@lambda.lt>
Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:25 +01:00
Louis DeLosSantos
bcf76a40c4
bpf: Add table ID to bpf_fib_lookup BPF helper
commit 8ad77e72ca upstream.

Add ability to specify routing table ID to the `bpf_fib_lookup` BPF
helper.

A new field `tbid` is added to `struct bpf_fib_lookup` used as
parameters to the `bpf_fib_lookup` BPF helper.

When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and
`BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup`
will be used as the table ID for the fib lookup.

If the `tbid` does not exist the fib lookup will fail with
`BPF_FIB_LKUP_RET_NOT_FWDED`.

The `tbid` field becomes a union over the vlan related output fields
in `struct bpf_fib_lookup` and will be zeroed immediately after usage.

This functionality is useful in containerized environments.

For instance, if a CNI wants to dictate the next-hop for traffic leaving
a container it can create a container-specific routing table and perform
a fib lookup against this table in a "host-net-namespace-side" TC program.

This functionality also allows `ip rule` like functionality at the TC
layer, allowing an eBPF program to pick a routing table based on some
aspect of the sk_buff.

As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN
datapath.

When egress traffic leaves a Pod an eBPF program attached by Cilium will
determine which VRF the egress traffic should target, and then perform a
FIB lookup in a specific table representing this VRF's FIB.

Signed-off-by: Louis DeLosSantos <louis.delos.devel@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230505-bpf-add-tbid-fib-lookup-v2-1-0a31c22c748c@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:25 +01:00
Pawan Gupta
b22f062284
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
commit 43fb862de8 upstream.

During VMentry VERW is executed to mitigate MDS. After VERW, any memory
access like register push onto stack may put host data in MDS affected
CPU buffers. A guest can then use MDS to sample host data.

Although likelihood of secrets surviving in registers at current VERW
callsite is less, but it can't be ruled out. Harden the MDS mitigation
by moving the VERW mitigation late in VMentry path.

Note that VERW for MMIO Stale Data mitigation is unchanged because of
the complexity of per-guest conditional VERW which is not easy to handle
that late in asm with no GPRs available. If the CPU is also affected by
MDS, VERW is unconditionally executed late in asm regardless of guest
having MMIO access.

  [ pawan: conflict resolved in backport ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:24 +01:00
Pawan Gupta
64adb6fa65
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
From: Sean Christopherson <seanjc@google.com>

commit 706a189dcf upstream.

Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus
VMLAUNCH.  Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF,
for MDS mitigations as late as possible without needing to duplicate VERW
for both paths.

  [ pawan: resolved merge conflict in __vmx_vcpu_run in backport. ]

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Link: https://lore.kernel.org/all/20240213-delay-verw-v8-5-a6216d83edb7%40linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:24 +01:00
Pawan Gupta
a5ce8634f7
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
commit 6613d82e61 upstream.

The VERW mitigation at exit-to-user is enabled via a static branch
mds_user_clear. This static branch is never toggled after boot, and can
be safely replaced with an ALTERNATIVE() which is convenient to use in
asm.

Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user
path. Also remove the now redundant VERW in exc_nmi() and
arch_exit_to_user_mode().

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:24 +01:00
Pawan Gupta
8f9a2e7933
x86/entry_32: Add VERW just before userspace transition
commit a0e2dab44d upstream.

As done for entry_64, add support for executing VERW late in exit to
user path for 32-bit mode.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-03-06 18:00:24 +01:00
19 changed files with 379 additions and 66 deletions

View file

@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
mds_clear_cpu_buffers() mds_clear_cpu_buffers()
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
(idle) transitions. (idle) transitions.
@ -138,17 +141,30 @@ Mitigation points
When transitioning from kernel to user space the CPU buffers are flushed When transitioning from kernel to user space the CPU buffers are flushed
on affected CPUs when the mitigation is not disabled on the kernel on affected CPUs when the mitigation is not disabled on the kernel
command line. The migitation is enabled through the static key command line. The mitigation is enabled through the feature flag
mds_user_clear. X86_FEATURE_CLEAR_CPU_BUF.
The mitigation is invoked in prepare_exit_to_usermode() which covers The mitigation is invoked just before transitioning to userspace after
all but one of the kernel to user space transitions. The exception user registers are restored. This is done to minimize the window in
is when we return from a Non Maskable Interrupt (NMI), which is which kernel data could be accessed after VERW e.g. via an NMI after
handled directly in do_nmi(). VERW.
(The reason that NMI is special is that prepare_exit_to_usermode() can **Corner case not handled**
enable IRQs. In NMI context, NMIs are blocked, and we don't want to Interrupts returning to kernel don't clear CPUs buffers since the
enable IRQs with NMIs blocked.) exit-to-user path is expected to do that anyways. But, there could be
a case when an NMI is generated in kernel after the exit-to-user path
has cleared the buffers. This case is not handled and NMI returning to
kernel don't clear CPU buffers because:
1. It is rare to get an NMI after VERW, but before returning to userspace.
2. For an unprivileged user, there is no known way to make that NMI
less rare or target it.
3. It would take a large number of these precisely-timed NMIs to mount
an actual attack. There's presumably not enough bandwidth.
4. The NMI in question occurs after a VERW, i.e. when user state is
restored and most interesting data is already scrubbed. Whats left
is only the data that NMI touches, and that may or may not be of
any interest.
2. C-State transition 2. C-State transition

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
VERSION = 6 VERSION = 6
PATCHLEVEL = 1 PATCHLEVEL = 1
SUBLEVEL = 80 SUBLEVEL = 81
EXTRAVERSION =-valve17-chos2 EXTRAVERSION =-valve17-chos2
NAME = Curry Ramen NAME = Curry Ramen

View file

@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1 BUG_IF_WRONG_CR3 no_user_check=1
popfl popfl
popl %eax popl %eax
CLEAR_CPU_BUFFERS
/* /*
* Return back to the vDSO, which will pop ecx and edx. * Return back to the vDSO, which will pop ecx and edx.
@ -981,6 +982,7 @@ restore_all_switch_stack:
/* Restore user state */ /* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code RESTORE_REGS pop=4 # skip orig_eax/error_code
CLEAR_CPU_BUFFERS
.Lirq_return: .Lirq_return:
/* /*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */ /* Not on SYSENTER stack. */
call exc_nmi call exc_nmi
CLEAR_CPU_BUFFERS
jmp .Lnmi_return jmp .Lnmi_return
.Lnmi_from_sysenter_stack: .Lnmi_from_sysenter_stack:

View file

@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void) static __always_inline void arch_exit_to_user_mode(void)
{ {
mds_user_clear_cpu_buffers();
amd_clear_divider(); amd_clear_divider();
} }
#define arch_exit_to_user_mode arch_exit_to_user_mode #define arch_exit_to_user_mode arch_exit_to_user_mode

View file

@ -381,7 +381,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
@ -415,17 +414,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
} }
/**
* mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
static __always_inline void mds_user_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_user_clear))
mds_clear_cpu_buffers();
}
/** /**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
* *

View file

@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */ /* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
/* Control MDS CPU buffer clear before returning to user space */
DEFINE_STATIC_KEY_FALSE(mds_user_clear);
EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */ /* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear);
@ -251,7 +248,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV; mds_mitigation = MDS_MITIGATION_VMWERV;
static_branch_enable(&mds_user_clear); setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt())) (mds_nosmt || cpu_mitigations_auto_nosmt()))
@ -355,7 +352,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is * For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well. * present on host, enable the mitigation for UCODE_NEEDED as well.
*/ */
static_branch_enable(&mds_user_clear); setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt()) if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false); cpu_smt_disable(false);
@ -423,7 +420,7 @@ static void __init mmio_select_mitigation(void)
*/ */
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM))) boot_cpu_has(X86_FEATURE_RTM)))
static_branch_enable(&mds_user_clear); setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else else
static_branch_enable(&mmio_stale_data_clear); static_branch_enable(&mmio_stale_data_clear);
@ -483,12 +480,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off()) if (cpu_mitigations_off())
return; return;
if (!static_key_enabled(&mds_user_clear)) if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out; goto out;
/* /*
* mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
* mitigation, if necessary. * Stale Data mitigation, if necessary.
*/ */
if (mds_mitigation == MDS_MITIGATION_OFF && if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) { boot_cpu_has_bug(X86_BUG_MDS)) {

View file

@ -522,9 +522,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
write_cr2(this_cpu_read(nmi_cr2)); write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state)) if (this_cpu_dec_return(nmi_state))
goto nmi_restart; goto nmi_restart;
if (user_mode(regs))
mds_user_clear_cpu_buffers();
} }
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)

View file

@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H #ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H
#define VMX_RUN_VMRESUME (1 << 0) #define VMX_RUN_VMRESUME_SHIFT 0
#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) #define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */

View file

@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */ /* Check if vmlaunch or vmresume is needed */
testb $VMX_RUN_VMRESUME, %bl bt $VMX_RUN_VMRESUME_SHIFT, %bx
/* Load guest registers. Don't clobber flags. */ /* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@ -128,8 +128,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */ /* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX mov VCPU_RAX(%_ASM_AX), %_ASM_AX
/* Check EFLAGS.ZF from 'testb' above */ /* Clobbers EFLAGS.ZF */
jz .Lvmlaunch CLEAR_CPU_BUFFERS
/* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
jnc .Lvmlaunch
/* /*
* After a successful VMRESUME/VMLAUNCH, control flow "magically" * After a successful VMRESUME/VMLAUNCH, control flow "magically"

View file

@ -407,7 +407,8 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{ {
vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
vmx_fb_clear_ctrl_available;
/* /*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
{ {
guest_state_enter_irqoff(); guest_state_enter_irqoff();
/* L1D Flush includes CPU buffer clear to mitigate MDS */ /*
* L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
* mitigation for MDS is done late in VMentry and is still
* executed in spite of L1D Flush. This is because an extra VERW
* should not matter much after the big hammer L1D Flush.
*/
if (static_branch_unlikely(&vmx_l1d_should_flush)) if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu); vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) && else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm)) kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers(); mds_clear_cpu_buffers();

View file

@ -212,8 +212,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
} }
} }
void efi_adjust_memory_range_protection(unsigned long start, efi_status_t efi_adjust_memory_range_protection(unsigned long start,
unsigned long size) unsigned long size)
{ {
efi_status_t status; efi_status_t status;
efi_gcd_memory_space_desc_t desc; efi_gcd_memory_space_desc_t desc;
@ -225,13 +225,17 @@ void efi_adjust_memory_range_protection(unsigned long start,
rounded_end = roundup(start + size, EFI_PAGE_SIZE); rounded_end = roundup(start + size, EFI_PAGE_SIZE);
if (memattr != NULL) { if (memattr != NULL) {
efi_call_proto(memattr, clear_memory_attributes, rounded_start, status = efi_call_proto(memattr, clear_memory_attributes,
rounded_end - rounded_start, EFI_MEMORY_XP); rounded_start,
return; rounded_end - rounded_start,
EFI_MEMORY_XP);
if (status != EFI_SUCCESS)
efi_warn("Failed to clear EFI_MEMORY_XP attribute\n");
return status;
} }
if (efi_dxe_table == NULL) if (efi_dxe_table == NULL)
return; return EFI_SUCCESS;
/* /*
* Don't modify memory region attributes, they are * Don't modify memory region attributes, they are
@ -244,7 +248,7 @@ void efi_adjust_memory_range_protection(unsigned long start,
status = efi_dxe_call(get_memory_space_descriptor, start, &desc); status = efi_dxe_call(get_memory_space_descriptor, start, &desc);
if (status != EFI_SUCCESS) if (status != EFI_SUCCESS)
return; break;
next = desc.base_address + desc.length; next = desc.base_address + desc.length;
@ -269,8 +273,10 @@ void efi_adjust_memory_range_protection(unsigned long start,
unprotect_start, unprotect_start,
unprotect_start + unprotect_size, unprotect_start + unprotect_size,
status); status);
break;
} }
} }
return EFI_SUCCESS;
} }
static efi_char16_t *efistub_fw_vendor(void) static efi_char16_t *efistub_fw_vendor(void)
@ -800,9 +806,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
*kernel_entry = addr + entry; *kernel_entry = addr + entry;
efi_adjust_memory_range_protection(addr, kernel_total_size); return efi_adjust_memory_range_protection(addr, kernel_total_size);
return EFI_SUCCESS;
} }
static void __noreturn enter_kernel(unsigned long kernel_addr, static void __noreturn enter_kernel(unsigned long kernel_addr,

View file

@ -5,8 +5,8 @@
extern void trampoline_32bit_src(void *, bool); extern void trampoline_32bit_src(void *, bool);
extern const u16 trampoline_ljmp_imm_offset; extern const u16 trampoline_ljmp_imm_offset;
void efi_adjust_memory_range_protection(unsigned long start, efi_status_t efi_adjust_memory_range_protection(unsigned long start,
unsigned long size); unsigned long size);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
efi_status_t efi_setup_5level_paging(void); efi_status_t efi_setup_5level_paging(void);

View file

@ -937,8 +937,8 @@ static void shutdown_pirq(struct irq_data *data)
return; return;
do_mask(info, EVT_MASK_REASON_EXPLICIT); do_mask(info, EVT_MASK_REASON_EXPLICIT);
xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info); xen_irq_info_cleanup(info);
xen_evtchn_close(evtchn);
} }
static void enable_pirq(struct irq_data *data) static void enable_pirq(struct irq_data *data)
@ -982,8 +982,6 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq); unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev; struct xenbus_device *dev;
xen_evtchn_close(evtchn);
switch (type_from_irq(irq)) { switch (type_from_irq(irq)) {
case IRQT_VIRQ: case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
@ -1001,6 +999,7 @@ static void __unbind_from_irq(unsigned int irq)
} }
xen_irq_info_cleanup(info); xen_irq_info_cleanup(info);
xen_evtchn_close(evtchn);
} }
xen_free_irq(irq); xen_free_irq(irq);

View file

@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
sockptr_t optval, unsigned int optlen); sockptr_t optval, unsigned int optlen);
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen); sockptr_t optval, sockptr_t optlen);
int (*ipv6_dev_get_saddr)(struct net *net,
const struct net_device *dst_dev,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr);
}; };
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;

View file

@ -3109,6 +3109,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT** * **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB * Do a direct table lookup vs full lookup using FIB
* rules. * rules.
* **BPF_FIB_LOOKUP_TBID**
* Used with BPF_FIB_LOOKUP_DIRECT.
* Use the routing table ID present in *params*->tbid
* for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT** * **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is * Perform lookup from an egress perspective (default is
* ingress). * ingress).
@ -3117,6 +3121,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common * and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after * use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ (). * doing **bpf_fib_lookup**\ ().
* **BPF_FIB_LOOKUP_SRC**
* Derive and set source IP addr in *params*->ipv{4,6}_src
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
@ -6687,6 +6696,8 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
}; };
enum { enum {
@ -6699,6 +6710,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
}; };
struct bpf_fib_lookup { struct bpf_fib_lookup {
@ -6733,6 +6745,9 @@ struct bpf_fib_lookup {
__u32 rt_metric; __u32 rt_metric;
}; };
/* input: source address to consider for lookup
* output: source address result from lookup
*/
union { union {
__be32 ipv4_src; __be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */ __u32 ipv6_src[4]; /* in6_addr; network order */
@ -6747,9 +6762,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */ __u32 ipv6_dst[4]; /* in6_addr; network order */
}; };
/* output */ union {
__be16 h_vlan_proto; struct {
__be16 h_vlan_TCI; /* output */
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
};
/* input: when accompanied with the
* 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
* specific routing table to use for the fib lookup.
*/
__u32 tbid;
};
__u8 smac[6]; /* ETH_ALEN */ __u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */
}; };

View file

@ -5752,6 +5752,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib_table *tb; struct fib_table *tb;
if (flags & BPF_FIB_LOOKUP_TBID) {
tbid = params->tbid;
/* zero out for vlan output */
params->tbid = 0;
}
tb = fib_get_table(net, tbid); tb = fib_get_table(net, tbid);
if (unlikely(!tb)) if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED; return BPF_FIB_LKUP_RET_NOT_FWDED;
@ -5803,6 +5809,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority; params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SRC)
params->ipv4_src = fib_result_prefsrc(net, &res);
/* xdp and cls_bpf programs are run in RCU-bh so /* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here * rcu_read_lock_bh is not needed here
*/ */
@ -5885,6 +5894,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib6_table *tb; struct fib6_table *tb;
if (flags & BPF_FIB_LOOKUP_TBID) {
tbid = params->tbid;
/* zero out for vlan output */
params->tbid = 0;
}
tb = ipv6_stub->fib6_get_table(net, tbid); tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb)) if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED; return BPF_FIB_LKUP_RET_NOT_FWDED;
@ -5939,6 +5954,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric; params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SRC) {
if (res.f6i->fib6_prefsrc.plen) {
*src = res.f6i->fib6_prefsrc.addr;
} else {
err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
&fl6.daddr, 0,
src);
if (err)
return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
}
}
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params; goto set_fwd_params;
@ -5957,7 +5984,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#endif #endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH) BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
BPF_FIB_LOOKUP_SRC)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags) struct bpf_fib_lookup *, params, int, plen, u32, flags)

View file

@ -1077,6 +1077,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.udp6_lib_lookup = __udp6_lib_lookup, .udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_setsockopt = do_ipv6_setsockopt,
.ipv6_getsockopt = do_ipv6_getsockopt, .ipv6_getsockopt = do_ipv6_getsockopt,
.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
}; };
static int __init inet6_init(void) static int __init inet6_init(void)

View file

@ -841,3 +841,219 @@ a3eb3a74aa8c94e6c8130b55f3b031f29162868c
ad5b847499287227525a35a0c463091fc7ccd252 ad5b847499287227525a35a0c463091fc7ccd252
3de7eedf00702831851c1046aaa64c575ee2fc90 3de7eedf00702831851c1046aaa64c575ee2fc90
56f768c4e358581a166924b8229ca200f73bbfdd 56f768c4e358581a166924b8229ca200f73bbfdd
b7be6c737a179a76901c872f6b4c1d00552d9a1b
ae5f10ed9539878f1128f3fa129f104ba97ffc86
7d34b1078665e171f4883b8675e52d17ebfc5c64
cf33e6ca12d814e1be2263cb76960d0019d7fb94
b73dd5f9997279715cd450ee8ca599aaff2eabb9
2a3d40b4025fcfe51b04924979f1653993b17669
a76072bc73c77cbdc6c77e5893376939894e6f73
00459ae532d6f1e7c720b5a331f40f72cf158dca
174ac6b53a20cc7f466eead68ccee55ab633e5a1
5dbedec7e5cf668caa0d76e02915eef16d22e97f
e30f82597bf64ad32f3b9718bb12791bf3926f3d
507eeaad4d32174640440f225a30112d8cccd374
0cea0c330a11461d0fbad5347a5d68d499db56fd
49e734926a4b07308d98dc9d3c8f05eb77f1da00
ed9fdc82cafbcf8a46b55d315219bf9464621bca
f8faa536370ec9db460bac96460e16801f62325e
8745f3592ee4a7b49ede16ddd3f12a41ecaa23c9
e89c84422f35ce9fcb0fe9e3f3f60506586a7bae
39c6312009574ca73865354133ca222e7753a71b
976126f2def45f4075f18372bc4e97bb5da3757a
0d04e45c65f0785e558b93d2631d58680f263e10
b3152afc0eb864f7c6ecad134a15b577ef7aec77
329fc4d3f73d865b25f2ee4eafafb040ace37ad5
e5f488993bc1893b84d93e9915155fab66a070d2
afbf1a5cef46427241e76704991cc83c9b1a463b
87632bc9ecff5ded93433bc0fca428019bdd1cfe
65a389ef979b5ca96bc08aa165d6710fe8f1e890
0b27bf4c494d61e5663baa34c3edd7ccebf0ea44
0ac219c4c3ab253f3981f346903458d20bacab32
ab63de24ebea36fe73ac7121738595d704b66d96
a3c8fa54e904b0ddb52a08cc2d8ac239054f61fd
29360fd3288f3978ccde2f8f7eba22282c4a08a3
e85b3c15398f6fa1f3941be8acbef79ae114744d
7985d73961bbb4e726c1be7b9cd26becc7be8325
1b0998fdd85776775d975d0024bca227597e836a
c41548fede3d4b0305be2237ba7dbf657e9ff30b
548ab66730848c8ed105e1d7caf9f4e3f68cdc94
d77ab053fb2f97ff366118d4dbffd8fe48168541
1b4223e807fa17bc53062e922e4e7266450e304f
aa5897232682c27ff731b083b40c879b0eb2c994
17ccd9798fe0beda3db212cfa3ebe373f605cbd6
cad078914b628737fa0946de02169e80fba721cf
45085686b9559bfbe3a4f41d3d695a520668f5e1
926405765f25809602c52e037827d0d5a9f62692
0b056a52b3adfe5fedf20cd64addbe4e1d226c95
30a5e812f78e3d1cced90e1ed750bf027599205f
2dc94c160ef0292d7da7ea2d4c3087c852c97fcc
7b410226d9eff7f64857a75d65e149440bff2b2f
eb7b5777d3c7f5dbbb0736f638068f50006d81b0
e5383662fd02ad9516ae2c27f85cd56296372ba9
29059d0f3bc21f76db5a375a70a449ba86f3d6cc
940963613275a39fd693fb1969c1c6fbc0798a21
fc47ed389a884ee4a5b01f62ecae8137b41f63d7
67ffc334b92a96e65b627b6b3349c25946ff69f6
92b8a3273f3812ba441d2a842d602ff7d33362b3
ddf6ee3df30b694ac0a66b243245e5b89b6162e2
b8afc22a1160121d108d7ea8496f133804d69b93
2b1414d5e94e477edff1d2c79030f1d742625ea0
f2261eb994aa5757c1da046b78e3229a3ece0ad9
a0222b48175709b8a66e5f373d17a10ca5659cc8
7d4121b40149aed0698c7b82384c5c069da91836
40f0f326cfe6847faaa409f4883b94fcdda468ab
08562ca971ff6d4d30ef7eb3fe932f8bf9dcd841
ddc547dd05a46720866c32022300f7376c40119f
cefe18e9ec84f8fe3e198ccebb815cc996eb9797
7f8644b6a86d45c9f8240734b161896a09069fe5
d36b9a1b4e5214abaf864afde5617b021b5cb588
2f91a96b892fab2f2543b4a55740c5bee36b1a6b
058ed71e0f7aa3b6694ca357e23d084e5d3f2470
8310080799b40fd9f2a8b808c657269678c149af
8f626221e5fa89134515d358e7d614609b612a5c
3bfe04c1273d30b866f4c7c238331ed3b08e5824
8cec41a35065dcfcca5a2337f4edd56dadd1425c
4cbbc2f0dbe22498e290997c52f088413d6b9ad5
fd3289ab8ed1f8a2f6e3593adf39bb610fbc17a5
59ed284c7bff4da0f6cafd05ca15de1c0ae1d087
abd32d7f5c0294c1b2454c5a3b13b18446bac627
930e826962d9f01dcd2220176134427358d112f2
c34adc20b91a8e55e048b18d63f4f4ae003ecf8f
f590040ce2b712177306b03c2a63b16f7d48d3c8
444d70889d199b7f74eec45f14768a83c0b04d73
2e443ed55fe3ffb08327b331a9f45e9382413c94
8dafc066c54669384ce01b4bbdfe9708a085afb9
237ecf1afe6c22534fa43abdf2bf0b0f52de0aaa
034e2d70b5c7f578200ad09955aeb2aa65d1164a
300111cd9042d133d1edd0255f50556211125ce9
474d521da890b3e3585335fb80a6044cb2553d99
70af82bb9c897faa25a44e4181f36c60312b71ef
bc9f87a41d185d7678c5742a4f5952df04bf2375
c65c475560851291ad64272d4b85b55e70c4adbb
4974d928d5e3909bd8cfe4b0bca2509636a8ebf2
76109a226a39aea5d621b9b0af04ba23fc9cf7de
249d6ca4ff0022a4b51a8eb9fac6d7bff2c94d1b
396a4120011d8d574eb57793efb0eec5f271a2c8
c9fa51d4c434fa7bdafd0c7a9e19cf9023787fd4
65742f4bb1f919caa564b8a20b15b8cdd6eca2ef
e6e04845c2e8af9fef7d58439e9f62a3ed93f33b
e64148635509bf13eea851986f5a0b150e5bd066
fbccc5eb1652b6c4ff446f34eb5a18869b7f4f3b
53e3f2ee8a0ce7fb33488325a74b678ddf74632a
fb7be5e5ec265a47f6763b6d772873da78bb09d0
84a3c10a0c79ede027b030f61a89b6ab7cf98226
03ad085eb14db2ddc4de5d9474426d258dc53954
a8722cece375838f7067aa929d89a819f7d1ae96
d93fd40c62397326046902a2c5cb75af50882a85
f27d319df055629480b84b9288a502337b6f2a2e
88067197e97af3fcb104dd86030f788ec1b32fdb
2d9b3e1ae1bed1f20621d5cc95e74746a4afbe7d
e7945d93fece3ae43d2ed47d5ef4e254c8a3b712
bad6e66d0701d88a1b7018ca0334b551fb71d74a
3bad8dc0ae8db10540290c69bbb3a3f8e6e5aff4
d8950e8e20e006c8cbc4cc1ff81c35921053a8a2
c577208f81c9ddbc5ab1418bfe810680a671fa84
469b84516cc456fdf003dc99d9a9b0e7eab27c24
beeeb4655db99ed0eb70f6756518fd202fd12e1a
ef12d049fa7b429a0f1842307e921da30ff2e97b
88035744b91a187bcc23253a73ef3b1ccc08a2f9
530a4271b7ba5776b7f5a67015ae63a3ba3d2348
29134968f72da9337dff949bba0bdb0c5134ba0a
2e47116315a08bd5fa451bbeb66cb14ffc3f0de1
801873f1750aa1cc42e290d8a818e340fd7d0987
e840ae3dc277f7f4ae38f600e7f5da7f169b8d7c
0912dce9ed4e8a6442fb39627cd37ca5a25beec5
cac22c9a5e661a000e734af797641375fa181dbc
71c43b714fd688ff5ee6d906e5cc38e6a8f2836f
a8901f331b8b7f95a7315d033a22bc84c8365f35
7bc9533e077e2553264b447189d13f83c47770a0
4f3077c3eae7e68e2c0ba6d1bd3f5afeb61eb269
51a0710218cea5c7d5528b92ba19e964423c7f5a
2cca5f519e3a967f4b5b72e69758521401f021eb
99a20f58913a4093c73817f5b364f0cf050a6d75
640f27fc2e7bd69d511675c0c62a90bd9ed977cb
6083b4c5908e0e6d1b578af04103f64c257ffb82
1523291591de054393ff4d732f18abd222ff5949
364d7745974f20ed940918e3129d10c271638153
e2fa53a04cc722aaaedbd91cd414d170067fb09d
df3dec320b7c14780484e824f3ec9c213e4996e1
463b51e90c576cd63269f8420c0a0b09152092e5
5c4feadb0011983bbc4587bc61056c7b379d9969
bf0ca988e250af95824c121873b2f76fccfc91df
04dd4403ff3721ad0bff925116fada773ed6ae69
831e9e63cc3b90f62d82df854cda8232408526a9
d03a9855cbe6f41b2928c4df2e33e05f32a8e7fa
0a49efb94888b6381d9c43fda17115ffda40a039
bfef0cfab41cb4894bc5cf8b93e76327ac04b9b9
850333a25aab582118d9fa405af00caae32faa62
f82865e2a026b6d491377e64ad18326a413e6421
137d20da8ea0daa9e0a2787acc4b66261e8796df
e62d8c1281662a0cff23df2948162c1fe705d613
519a80ea5a1770f1bb7d0627f4670ca1c1767f80
9fbef7dcd8aa552d5a7e6867eec570e89d7d1631
fae3f8b554fae8631a954e2b205ad84c531ba71b
255ac53d78d562fe27b486360699dcbeb0bfacf8
6ee5c4e269a9136da48df4126c4dde9b899d35cf
c8d8876aae34f2609d3ea815106024645fa85112
5aa0c564c017a008b3d971a6228cfae171695f57
0d4150f5eb20b2f14153474af7ca3a26814850a8
49e8d9f465006ba7197cbcc6d297528b72a2f196
5a1f61516f802d95959944e7529d23dfa6868031
6b12589f610ae5ca924573315b4cf3afd593cb09
1f76cb66ff2257675666172aba42b4e661809a20
c66f9f22e6e555edd575d471c6a309466651a2f5
e017486dadf9ebb890bdd654b67014a9aeaa41c1
8b7be6ef588e0df6036e99f0f637fdac641da396
12e63680a76cad3bf505669b753560582ccadfcb
8973a8f9b72dbafbd1083c220d975a0e7ec871d0
ccbf6efab8d37e3af007e83d7e7797f0ab2f3064
0920deeec6dd2e8d142a688a81744702895d46c6
371e1c1b326b5de0a12204f217709e2f626c7fe7
4481d72a4b63eb190e71e381050aa2959226e13b
f30f07ba5789ef5c68c2352b996d8a98fefca8a2
f28dae54632c5ea45f32ddc6fba494f5efc15007
7b2b8a6c75f0c0175f626d61a74e4f7f75d38df4
eb73733124305ce47d86d74fc3610ea7a4e55260
5c6c2fb3c12f7d7bb7f04259878ac965a8ea2d2d
ce606d5334c2abd772bac18c5ee83f3dd82f2a11
c479755cb80a85bbd7569fa7a7e133a66f792a31
f3ea5ec83d1a827f074b2b660749817e0bf2b23e
56587affe21c5cd806523a89efd8da5b49872a72
e58f2862e9fe500b073d20f94e73abc52fb70634
33d064aecd89846d5cf284ab75eeb9098b5ff49e
f0acafd6f79fa6068b7fc4af7980ac9bbd14f1d1
1f3fd81bff03355c3acc8558c3c4da2f2d4e1d18
34378d7ad273ff859c1ed9ab77bb71e55f652b06
8ff6d88c0443acdd4199aacb69f1dd4a24120e8e
476a48cd37c948b160cc3d5ff5b4d2e711f1ca36
350265a753d8b39e2bb11660f2109c8dd5306b45
5a664585a71c3af82a64aa9b38cadfa02f11c841
77330c123d7c443936585f25b31d3979876ba1d0
fff7614f576f802fb0f4ff169cb251c180ce377e
2dfaeac3f38e4e550d215204eedd97a061fdc118
1b54062576792b41f0acb8d562deea7c4c718c33
86c909d2275b91fb34be07b081c7343a0c2351f2
8f05493706ff8296d26b449db295b1dbb1de31dd
3a396c409a39ce701533f3f55f3db0ab700aaeae
2402392bed4e440e05442fb1de4ef97536ff5a96
c4c795b21dd23d9514ae1c6646c3fb2c78b5be60
2c96f66cd0cca5695ec326398f98b58f545ac087
7eb95e0af5c9c2e6fad50356eaf32d216d0e7bc3
a3d369aeb332bc7a29ba1facb9a3d3d8ba8d2568
17acece41de3dafb63018fecbf54d288366901eb
c6ff5fb6b157cf4101889c1f3e169eb6897e8f50
0e351d1aa2e4c1a7a4cb2a5753b86db89796d3c8
19ec82b3cad1abef2a929262b8c1528f4e0c192d
559035e04e442a0c7fd58d5fe00308b0d99e2318
29d3e02fb448b50ffd5d83156de9680daf16f47a
22444d079b4ccc608b9bac3e591cd88629c73df7
2e3087505ddb8ba2d3d4c81306cca11e868fcdb9
07946d956b55703102d5eb1518888f0d0ac87e14
edfaad334a11d4fba21cbd860ba9a61213f4bd0b
da67116b74e6aa9c531de386e1d99f2e460d1cc4
5fafd8254add75d8337df44ba8536e407ffe8928
2d7ebcb5d878b4311db56eeaf7bdd76dbe9b9a13
8866334e35102d054160a86750b7db9203f721f9
585a344af6bcac222608a158fc2830ff02712af5
61adba85cc40287232a539e607164f273260e0fe

View file

@ -3109,6 +3109,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT** * **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB * Do a direct table lookup vs full lookup using FIB
* rules. * rules.
* **BPF_FIB_LOOKUP_TBID**
* Used with BPF_FIB_LOOKUP_DIRECT.
* Use the routing table ID present in *params*->tbid
* for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT** * **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is * Perform lookup from an egress perspective (default is
* ingress). * ingress).
@ -3117,6 +3121,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common * and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after * use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ (). * doing **bpf_fib_lookup**\ ().
* **BPF_FIB_LOOKUP_SRC**
* Derive and set source IP addr in *params*->ipv{4,6}_src
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
@ -6687,6 +6696,8 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
}; };
enum { enum {
@ -6699,6 +6710,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
}; };
struct bpf_fib_lookup { struct bpf_fib_lookup {
@ -6733,6 +6745,9 @@ struct bpf_fib_lookup {
__u32 rt_metric; __u32 rt_metric;
}; };
/* input: source address to consider for lookup
* output: source address result from lookup
*/
union { union {
__be32 ipv4_src; __be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */ __u32 ipv6_src[4]; /* in6_addr; network order */
@ -6747,9 +6762,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */ __u32 ipv6_dst[4]; /* in6_addr; network order */
}; };
/* output */ union {
__be16 h_vlan_proto; struct {
__be16 h_vlan_TCI; /* output */
__be16 h_vlan_proto;
__be16 h_vlan_TCI;
};
/* input: when accompanied with the
* 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
* specific routing table to use for the fib lookup.
*/
__u32 tbid;
};
__u8 smac[6]; /* ETH_ALEN */ __u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */
}; };