-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[BOLT] Optimize the codegen of createLoadImmediate for AArch64. #137413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-bolt Author: Rodrigo Rocha (rcorcs) ChangesThe code generation of createLoadImmediate for AArch64 was always emitting 4 instructions, regardless of the immediate value being loaded into the 64-bit register. This patch makes sure that only the necessary number of instructions are used depending on the value of the immediate being loaded into a register (ranging from 1 to 4 instructions). Full diff: https://github.com/llvm/llvm-project/pull/137413.diff 2 Files Affected:
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index e00d6a18b0f6c..0aa9504f50a15 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2173,14 +2173,26 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
InstructionListType createLoadImmediate(const MCPhysReg Dest,
uint64_t Imm) const override {
- InstructionListType Insts(4);
- int Shift = 48;
- for (int I = 0; I < 4; I++, Shift -= 16) {
- Insts[I].setOpcode(AArch64::MOVKXi);
- Insts[I].addOperand(MCOperand::createReg(Dest));
- Insts[I].addOperand(MCOperand::createReg(Dest));
- Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF));
- Insts[I].addOperand(MCOperand::createImm(Shift));
+ InstructionListType Insts;
+ for (int I = 0, Shift = 0; I < 4; I++, Shift += 16) {
+ uint16_t HalfWord = (Imm >> Shift) & 0xFFFF;
+ if (!HalfWord)
+ continue;
+ MCInst Inst;
+ if (Insts.size() == 0) {
+ Inst.setOpcode(AArch64::MOVZXi);
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createImm(HalfWord));
+ Inst.addOperand(MCOperand::createImm(Shift));
+ Insts.push_back(Inst);
+ } else {
+ Inst.setOpcode(AArch64::MOVKXi);
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createImm(HalfWord));
+ Inst.addOperand(MCOperand::createImm(Shift));
+ Insts.push_back(Inst);
+ }
}
return Insts;
}
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 7016dec0e3574..ac0529cb09a7b 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -167,6 +167,90 @@ TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
ASSERT_EQ(Label, BB->getLabel());
}
+TEST_P(MCPlusBuilderTester, AArch64_LoadImm32) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, 2);
+ BB->addInstructions(Instrs.begin(), Instrs.end());
+
+ ASSERT_EQ(BB->size(), 1);
+ auto II = BB->begin();
+ // mov x0, #2
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getImm(), 2);
+ ASSERT_EQ(II->getOperand(2).getImm(), 0);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_LoadImm64) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ int64_t Imm = ((uint64_t)4) << 48 | ((uint64_t)3) << 32 | 2 << 16 | 1;
+ InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, Imm);
+ BB->addInstructions(Instrs.begin(), Instrs.end());
+
+ ASSERT_EQ(BB->size(), 4);
+ auto II = BB->begin();
+ // mov x0, #1
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getImm(), 1);
+ ASSERT_EQ(II->getOperand(2).getImm(), 0);
+ II++;
+ // movk x0, #2, lsl #16
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 2);
+ ASSERT_EQ(II->getOperand(3).getImm(), 16);
+ II++;
+ // movk x0, #3, lsl #32
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 3);
+ ASSERT_EQ(II->getOperand(3).getImm(), 32);
+ II++;
+ // movk x0, #4, lsl #48
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 4);
+ ASSERT_EQ(II->getOperand(3).getImm(), 48);
+}
+
+TEST_P(MCPlusBuilderTester, AArch64_LoadImm64Partial) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ int64_t Imm = ((uint64_t)4) << 48 | 2 << 16;
+ InstructionListType Instrs = BC->MIB->createLoadImmediate(AArch64::X0, Imm);
+ BB->addInstructions(Instrs.begin(), Instrs.end());
+
+ ASSERT_EQ(BB->size(), 2);
+ auto II = BB->begin();
+ // mov x0, #2, lsl #16
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVZXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getImm(), 2);
+ ASSERT_EQ(II->getOperand(2).getImm(), 16);
+ II++;
+ // movk x0, #4, lsl #48
+ ASSERT_EQ(II->getOpcode(), AArch64::MOVKXi);
+ ASSERT_EQ(II->getOperand(0).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(1).getReg(), AArch64::X0);
+ ASSERT_EQ(II->getOperand(2).getImm(), 4);
+ ASSERT_EQ(II->getOperand(3).getImm(), 48);
+}
+
TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitDef) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
|
Hi Rodrigo, thanks for improving the codegen. While looking at LLVM AArch64 backend, I've noticed that there's |
The code generation of createLoadImmediate for AArch64 was always emitting 4 instructions, regardless of the immediate value being loaded into the 64-bit register. This patch makes sure that only the necessary number of instructions are used depending on the value of the immediate being loaded into a register (ranging from 1 to 4 instructions).
The unit tests created help us to verify this new capability.