nak: Add encodings for cache eviction priorities

We were previously setting "evict first" everywhere, which could
possibly be a perf issue on machines that are also running shaders
compiled with codegen, which sets "evict normal" on everything.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26201>
This commit is contained in:
M Henning 2023-11-13 22:00:28 -05:00 committed by Faith Ekstrand
parent df9a95a813
commit adc3fd4c99
4 changed files with 86 additions and 8 deletions

View file

@ -1176,6 +1176,18 @@ impl SM75Instr {
}
}
fn set_eviction_priority(&mut self, pri: &MemEvictionPriority) {
self.set_field(
84..86,
match pri {
MemEvictionPriority::First => 0_u8,
MemEvictionPriority::Normal => 1_u8,
MemEvictionPriority::Last => 2_u8,
MemEvictionPriority::Unchanged => 3_u8,
},
);
}
fn encode_suld(&mut self, op: &OpSuLd) {
self.set_opcode(0x998);
@ -1186,6 +1198,7 @@ impl SM75Instr {
self.set_image_dim(61..64, op.image_dim);
self.set_mem_order(&op.mem_order);
self.set_eviction_priority(&op.mem_eviction_priority);
assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
self.set_field(72..76, op.mask);
@ -1200,6 +1213,7 @@ impl SM75Instr {
self.set_image_dim(61..64, op.image_dim);
self.set_mem_order(&op.mem_order);
self.set_eviction_priority(&op.mem_eviction_priority);
assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
self.set_field(72..76, op.mask);
@ -1220,6 +1234,7 @@ impl SM75Instr {
self.set_image_dim(61..64, op.image_dim);
self.set_mem_order(&op.mem_order);
self.set_eviction_priority(&op.mem_eviction_priority);
self.set_bit(72, false); /* .BA */
self.set_atom_type(73..76, op.atom_type);
@ -1252,6 +1267,7 @@ impl SM75Instr {
);
self.set_mem_type(73..76, access.mem_type);
self.set_mem_order(&access.order);
self.set_eviction_priority(&access.eviction_priority);
}
fn encode_ldg(&mut self, op: &OpLd) {
@ -1275,6 +1291,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
}
fn encode_lds(&mut self, op: &OpLd) {
@ -1287,6 +1304,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
self.set_bit(87, false); /* !.ZD - Returns a predicate? */
}
@ -1333,6 +1351,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
}
fn encode_sts(&mut self, op: &OpSt) {
@ -1345,6 +1364,7 @@ impl SM75Instr {
assert!(op.access.addr_type == MemAddrType::A32);
self.set_mem_type(73..76, op.access.mem_type);
assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
}
fn encode_st(&mut self, op: &OpSt) {
@ -1419,6 +1439,7 @@ impl SM75Instr {
self.set_atom_type(73..76, op.atom_type);
self.set_mem_order(&op.mem_order);
self.set_eviction_priority(&op.mem_eviction_priority);
}
fn encode_atoms(&mut self, op: &OpAtom) {
@ -1441,6 +1462,7 @@ impl SM75Instr {
assert!(op.addr_type == MemAddrType::A32);
assert!(op.mem_order == MemOrder::Strong(MemScope::CTA));
assert!(op.mem_eviction_priority == MemEvictionPriority::Normal);
self.set_atom_type(73..76, op.atom_type);
}

View file

@ -1433,6 +1433,7 @@ impl<'a> ShaderFromNir<'a> {
atom_type: atom_type,
image_dim: dim,
mem_order: MemOrder::Strong(MemScope::System),
mem_eviction_priority: MemEvictionPriority::Normal,
});
self.set_dst(&intrin.def, dst);
}
@ -1453,6 +1454,7 @@ impl<'a> ShaderFromNir<'a> {
resident: Dst::None,
image_dim: dim,
mem_order: MemOrder::Strong(MemScope::System),
mem_eviction_priority: MemEvictionPriority::Normal,
mask: (1 << comps) - 1,
handle: handle,
coord: coord,
@ -1473,6 +1475,7 @@ impl<'a> ShaderFromNir<'a> {
b.push_op(OpSuSt {
image_dim: dim,
mem_order: MemOrder::Strong(MemScope::System),
mem_eviction_priority: MemEvictionPriority::Normal,
mask: (1 << comps) - 1,
handle: handle,
coord: coord,
@ -1529,6 +1532,7 @@ impl<'a> ShaderFromNir<'a> {
addr_offset: offset,
mem_space: MemSpace::Global,
mem_order: MemOrder::Strong(MemScope::System),
mem_eviction_priority: MemEvictionPriority::Normal,
});
self.set_dst(&intrin.def, dst);
}
@ -1554,6 +1558,7 @@ impl<'a> ShaderFromNir<'a> {
addr_offset: offset,
mem_space: MemSpace::Global,
mem_order: MemOrder::Strong(MemScope::System),
mem_eviction_priority: MemEvictionPriority::Normal,
});
self.set_dst(&intrin.def, dst);
}
@ -1634,6 +1639,7 @@ impl<'a> ShaderFromNir<'a> {
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Global,
order: order,
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 32);
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
@ -1721,6 +1727,7 @@ impl<'a> ShaderFromNir<'a> {
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Local,
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
@ -1742,6 +1749,7 @@ impl<'a> ShaderFromNir<'a> {
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Shared,
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let offset = offset + intrin.base();
@ -1900,6 +1908,7 @@ impl<'a> ShaderFromNir<'a> {
addr_offset: offset,
mem_space: MemSpace::Shared,
mem_order: MemOrder::Strong(MemScope::CTA),
mem_eviction_priority: MemEvictionPriority::Normal,
});
self.set_dst(&intrin.def, dst);
}
@ -1925,6 +1934,7 @@ impl<'a> ShaderFromNir<'a> {
addr_offset: offset,
mem_space: MemSpace::Shared,
mem_order: MemOrder::Strong(MemScope::CTA),
mem_eviction_priority: MemEvictionPriority::Normal,
});
self.set_dst(&intrin.def, dst);
}
@ -1938,6 +1948,7 @@ impl<'a> ShaderFromNir<'a> {
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Global,
order: MemOrder::Strong(MemScope::System),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 32);
@ -1974,6 +1985,7 @@ impl<'a> ShaderFromNir<'a> {
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Local,
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
@ -1994,6 +2006,7 @@ impl<'a> ShaderFromNir<'a> {
mem_type: MemType::from_size(size_B, false),
space: MemSpace::Shared,
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
let offset = offset + intrin.base();

View file

@ -1798,19 +1798,45 @@ impl fmt::Display for MemSpace {
}
}
#[allow(dead_code)]
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum MemEvictionPriority {
First,
Normal,
Last,
Unchanged,
}
impl fmt::Display for MemEvictionPriority {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MemEvictionPriority::First => write!(f, ".EF"),
MemEvictionPriority::Normal => Ok(()),
MemEvictionPriority::Last => write!(f, ".EL"),
MemEvictionPriority::Unchanged => write!(f, ".LU"),
}
}
}
#[derive(Clone)]
pub struct MemAccess {
pub addr_type: MemAddrType,
pub mem_type: MemType,
pub space: MemSpace,
pub order: MemOrder,
pub eviction_priority: MemEvictionPriority,
}
impl fmt::Display for MemAccess {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}.{}.{}.{}",
self.addr_type, self.mem_type, self.space, self.order
"{}.{}.{}.{}{}",
self.addr_type,
self.mem_type,
self.space,
self.order,
self.eviction_priority
)
}
}
@ -3045,6 +3071,7 @@ pub struct OpSuLd {
pub image_dim: ImageDim,
pub mem_order: MemOrder,
pub mem_eviction_priority: MemEvictionPriority,
pub mask: u8,
#[src_type(GPR)]
@ -3058,9 +3085,10 @@ impl fmt::Display for OpSuLd {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SULD.P.{}.{} {{ {} {} }} [{}] {}",
"SULD.P.{}.{}{} {{ {} {} }} [{}] {}",
self.image_dim,
self.mem_order,
self.mem_eviction_priority,
self.dst,
self.resident,
self.coord,
@ -3074,6 +3102,7 @@ impl fmt::Display for OpSuLd {
pub struct OpSuSt {
pub image_dim: ImageDim,
pub mem_order: MemOrder,
pub mem_eviction_priority: MemEvictionPriority,
pub mask: u8,
#[src_type(GPR)]
@ -3090,8 +3119,13 @@ impl fmt::Display for OpSuSt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SUST.P.{}.{} [{}] {} {}",
self.image_dim, self.mem_order, self.coord, self.data, self.handle,
"SUST.P.{}.{}{} [{}] {} {}",
self.image_dim,
self.mem_order,
self.mem_eviction_priority,
self.coord,
self.data,
self.handle,
)
}
}
@ -3108,6 +3142,7 @@ pub struct OpSuAtom {
pub atom_type: AtomType,
pub mem_order: MemOrder,
pub mem_eviction_priority: MemEvictionPriority,
#[src_type(GPR)]
pub handle: Src,
@ -3123,11 +3158,12 @@ impl fmt::Display for OpSuAtom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"SUATOM.P.{}.{}.{}.{} [{}] {} {}",
"SUATOM.P.{}.{}.{}.{}{} [{}] {} {}",
self.image_dim,
self.atom_op,
self.atom_type,
self.mem_order,
self.mem_eviction_priority,
self.coord,
self.data,
self.handle,
@ -3233,14 +3269,19 @@ pub struct OpAtom {
pub mem_space: MemSpace,
pub mem_order: MemOrder,
pub mem_eviction_priority: MemEvictionPriority,
}
impl fmt::Display for OpAtom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"ATOM.{}.{}.{} {}",
self.atom_op, self.atom_type, self.mem_order, self.dst
"ATOM.{}.{}.{}{} {}",
self.atom_op,
self.atom_type,
self.mem_order,
self.mem_eviction_priority,
self.dst
)?;
write!(f, " [")?;
if !self.addr.is_zero() {

View file

@ -49,6 +49,7 @@ impl LowerCopySwap {
mem_type: MemType::B32,
space: MemSpace::Local,
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let addr = self.slm_start + src_reg.base_idx() * 4;
self.slm_size = max(self.slm_size, addr + 4);
@ -104,6 +105,7 @@ impl LowerCopySwap {
mem_type: MemType::B32,
space: MemSpace::Local,
order: MemOrder::Strong(MemScope::CTA),
eviction_priority: MemEvictionPriority::Normal,
};
let addr = self.slm_start + dst_reg.base_idx() * 4;
self.slm_size = max(self.slm_size, addr + 4);