Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[WASM] Expand memcmp for small size
  • Loading branch information
badumbatish committed Jul 11, 2025
commit d5103f62e28173fff2c720a64bbe32a7db9cdd22
4 changes: 4 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
: TargetLowering(TM), Subtarget(&STI) {
auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;

// Set the load count for memcmp expand optimization
MaxLoadsPerMemcmp = 3;
MaxLoadsPerMemcmpOptSize = 2;
Comment thread
badumbatish marked this conversation as resolved.
Outdated

// Booleans always contain 0 or 1.
setBooleanContents(ZeroOrOneBooleanContent);
// Except in SIMD vectors
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ InstructionCost WebAssemblyTTIImpl::getCastInstrCost(
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}

WebAssemblyTTIImpl::TTI::MemCmpExpansionOptions
WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
Comment thread
badumbatish marked this conversation as resolved.
// INFO: I'm not sure what determines this, setting 2 conservatively
Options.NumLoadsPerBlock = 2;
Comment thread
badumbatish marked this conversation as resolved.
Outdated
Options.LoadSizes.append({8, 4, 2, 1});
Comment thread
badumbatish marked this conversation as resolved.
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
return Options;
}

InstructionCost WebAssemblyTTIImpl::getMemoryOpCost(
unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;

TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;

InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
Expand Down
88 changes: 72 additions & 16 deletions llvm/test/CodeGen/WebAssembly/memcmp-expand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,21 @@ define i1 @memcmp_expand_3(ptr %a, ptr %b) {
; CHECK-LABEL: memcmp_expand_3:
; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const $push0=, 3
; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0
; CHECK-NEXT: i32.eqz $push2=, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: i32.load16_u $push7=, 0($0):p2align=0
; CHECK-NEXT: i32.load16_u $push6=, 0($1):p2align=0
; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
; CHECK-NEXT: i32.const $push0=, 2
; CHECK-NEXT: i32.add $push3=, $0, $pop0
; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
; CHECK-NEXT: i32.const $push13=, 2
; CHECK-NEXT: i32.add $push1=, $1, $pop13
; CHECK-NEXT: i32.load8_u $push2=, 0($pop1)
; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
; CHECK-NEXT: i32.const $push10=, 65535
; CHECK-NEXT: i32.and $push11=, $pop9, $pop10
; CHECK-NEXT: i32.eqz $push12=, $pop11
; CHECK-NEXT: return $pop12
%cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3)
%res = icmp eq i32 %cmp_3, 0
ret i1 %res
Expand All @@ -22,10 +33,19 @@ define i1 @memcmp_expand_5(ptr %a, ptr %b) {
; CHECK-LABEL: memcmp_expand_5:
; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const $push0=, 5
; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0
; CHECK-NEXT: i32.eqz $push2=, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0
; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0
; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
; CHECK-NEXT: i32.const $push0=, 4
; CHECK-NEXT: i32.add $push3=, $0, $pop0
; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
; CHECK-NEXT: i32.const $push11=, 4
; CHECK-NEXT: i32.add $push1=, $1, $pop11
; CHECK-NEXT: i32.load8_u $push2=, 0($pop1)
; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
; CHECK-NEXT: i32.eqz $push10=, $pop9
; CHECK-NEXT: return $pop10
%cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5)
%res = icmp eq i32 %cmp_5, 0
ret i1 %res
Expand All @@ -35,10 +55,37 @@ define i1 @memcmp_expand_7(ptr %a, ptr %b) {
; CHECK-LABEL: memcmp_expand_7:
; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const $push0=, 7
; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0
; CHECK-NEXT: i32.eqz $push2=, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: block
; CHECK-NEXT: block
; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0
; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0
; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6
; CHECK-NEXT: i32.const $push0=, 4
; CHECK-NEXT: i32.add $push3=, $0, $pop0
; CHECK-NEXT: i32.load16_u $push4=, 0($pop3):p2align=0
; CHECK-NEXT: i32.const $push17=, 4
; CHECK-NEXT: i32.add $push1=, $1, $pop17
; CHECK-NEXT: i32.load16_u $push2=, 0($pop1):p2align=0
; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2
; CHECK-NEXT: i32.or $push9=, $pop8, $pop5
; CHECK-NEXT: br_if 0, $pop9 # 0: down to label1
; CHECK-NEXT: # %bb.1: # %loadbb1
; CHECK-NEXT: i32.const $2=, 0
; CHECK-NEXT: i32.const $push10=, 6
; CHECK-NEXT: i32.add $push13=, $0, $pop10
; CHECK-NEXT: i32.load8_u $push14=, 0($pop13)
; CHECK-NEXT: i32.const $push18=, 6
; CHECK-NEXT: i32.add $push11=, $1, $pop18
; CHECK-NEXT: i32.load8_u $push12=, 0($pop11)
; CHECK-NEXT: i32.eq $push15=, $pop14, $pop12
; CHECK-NEXT: br_if 1, $pop15 # 1: down to label0
; CHECK-NEXT: .LBB2_2: # %res_block
; CHECK-NEXT: end_block # label1:
; CHECK-NEXT: i32.const $2=, 1
; CHECK-NEXT: .LBB2_3: # %endblock
; CHECK-NEXT: end_block # label0:
; CHECK-NEXT: i32.eqz $push16=, $2
; CHECK-NEXT: return $pop16
%cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7)
%res = icmp eq i32 %cmp_7, 0
ret i1 %res
Expand Down Expand Up @@ -76,10 +123,19 @@ define i1 @memcmp_expand_16(ptr %a, ptr %b) {
; CHECK-LABEL: memcmp_expand_16:
; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const $push0=, 16
; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0
; CHECK-NEXT: i32.eqz $push2=, $pop1
; CHECK-NEXT: return $pop2
; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0
; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0
; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6
; CHECK-NEXT: i32.const $push0=, 8
; CHECK-NEXT: i32.add $push3=, $0, $pop0
; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0
; CHECK-NEXT: i32.const $push11=, 8
; CHECK-NEXT: i32.add $push1=, $1, $pop11
; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0
; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2
; CHECK-NEXT: i64.or $push9=, $pop8, $pop5
; CHECK-NEXT: i64.eqz $push10=, $pop9
; CHECK-NEXT: return $pop10
%cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16)
%res = icmp eq i32 %cmp_16, 0
ret i1 %res
Expand Down
Loading