Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 26475f5

Browse files
authored
[AArch64] Refactor @plt, @gotpcrel, and @AUTH to use parseDataExpr
Following PR llvm#132569 (RISC-V), which added `parseDataExpr` for parsing expressions in data directives (e.g., `.word`), this PR migrates AArch64 `@plt`, `@gotpcrel`, and `@AUTH` from the `parsePrimaryExpr` workaround to `parseDataExpr`. The goal is to align with the GNU assembler model, where relocation specifiers apply to the entire operand rather than individual terms, reducing complexity-especially evident in `@AUTH` parsing. Note: AArch64 ELF lacks an official syntax for data directives (llvm#132570). A prefix notation might be a preferable future direction. I recommend `%specifier(expr)`. AsmParser's `@specifier` parsing is suboptimal, necessitating lexer workarounds. `@` might appear multiple times in an operand. We should not use `@` beyond the existing AArch64 Mach-O instruction operands. In the test elf-reloc-ptrauth.s, many errors are now reported at parse time. Pull Request: llvm#134202
1 parent bb7ff13 commit 26475f5

File tree

7 files changed

+144
-130
lines changed

7 files changed

+144
-130
lines changed

llvm/include/llvm/MC/MCParser/MCAsmParser.h

+3
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,9 @@ class MCAsmParser {
332332

333333
/// Parse a .gnu_attribute.
334334
bool parseGNUAttribute(SMLoc L, int64_t &Tag, int64_t &IntegerValue);
335+
336+
bool parseAtSpecifier(const MCExpr *&Res, SMLoc &EndLoc);
337+
const MCExpr *applySpecifier(const MCExpr *E, uint32_t Variant);
335338
};
336339

337340
/// Create an MCAsmParser instance for parsing assembly similar to gas syntax

llvm/lib/MC/MCParser/AsmParser.cpp

+20-6
Original file line numberDiff line numberDiff line change
@@ -670,8 +670,6 @@ class AsmParser : public MCAsmParser {
670670
bool parseEscapedString(std::string &Data) override;
671671
bool parseAngleBracketString(std::string &Data) override;
672672

673-
const MCExpr *applySpecifier(const MCExpr *E, uint32_t Variant);
674-
675673
// Macro-like directives
676674
MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
677675
void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
@@ -1193,7 +1191,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
11931191

11941192
Split = std::make_pair(Identifier, VName);
11951193
}
1196-
} else {
1194+
} else if (Lexer.getAllowAtInIdentifier()) {
11971195
Split = Identifier.split('@');
11981196
}
11991197
} else if (MAI.useParensForSpecifier() &&
@@ -1341,7 +1339,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
13411339
return parseExpression(Res, EndLoc);
13421340
}
13431341

1344-
const MCExpr *AsmParser::applySpecifier(const MCExpr *E, uint32_t Spec) {
1342+
const MCExpr *MCAsmParser::applySpecifier(const MCExpr *E, uint32_t Spec) {
13451343
// Ask the target implementation about this expression first.
13461344
const MCExpr *NewE = getTargetParser().applySpecifier(E, Spec, Ctx);
13471345
if (NewE)
@@ -1432,6 +1430,23 @@ static std::string angleBracketString(StringRef AltMacroStr) {
14321430
return Res;
14331431
}
14341432

1433+
bool MCAsmParser::parseAtSpecifier(const MCExpr *&Res, SMLoc &EndLoc) {
1434+
if (parseOptionalToken(AsmToken::At)) {
1435+
if (getLexer().isNot(AsmToken::Identifier))
1436+
return TokError("expected specifier following '@'");
1437+
1438+
auto Spec = MAI.getSpecifierForName(getTok().getIdentifier());
1439+
if (!Spec)
1440+
return TokError("invalid specifier '@" + getTok().getIdentifier() + "'");
1441+
1442+
const MCExpr *ModifiedRes = applySpecifier(Res, *Spec);
1443+
if (ModifiedRes)
1444+
Res = ModifiedRes;
1445+
Lex();
1446+
}
1447+
return false;
1448+
}
1449+
14351450
/// Parse an expression and return it.
14361451
///
14371452
/// expr ::= expr &&,|| expr -> lowest.
@@ -1452,8 +1467,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
14521467
// As a special case, we support 'a op b @ modifier' by rewriting the
14531468
// expression to include the modifier. This is inefficient, but in general we
14541469
// expect users to use 'a@modifier op b'.
1455-
if (Ctx.getAsmInfo()->useAtForSpecifier() &&
1456-
parseOptionalToken(AsmToken::At)) {
1470+
if (Lexer.getAllowAtInIdentifier() && parseOptionalToken(AsmToken::At)) {
14571471
if (Lexer.isNot(AsmToken::Identifier))
14581472
return TokError("unexpected symbol modifier following '@'");
14591473

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

+73-54
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/ADT/StringRef.h"
2626
#include "llvm/ADT/StringSwitch.h"
2727
#include "llvm/ADT/Twine.h"
28+
#include "llvm/MC/MCAsmInfo.h"
2829
#include "llvm/MC/MCContext.h"
2930
#include "llvm/MC/MCExpr.h"
3031
#include "llvm/MC/MCInst.h"
@@ -180,6 +181,7 @@ class AArch64AsmParser : public MCTargetAsmParser {
180181
bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
181182
OperandVector &Operands);
182183

184+
bool parseDataExpr(const MCExpr *&Res) override;
183185
bool parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc);
184186

185187
bool parseDirectiveArch(SMLoc L);
@@ -335,8 +337,6 @@ class AArch64AsmParser : public MCTargetAsmParser {
335337
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
336338
unsigned Kind) override;
337339

338-
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
339-
340340
static bool classifySymbolRef(const MCExpr *Expr,
341341
AArch64MCExpr::Specifier &ELFSpec,
342342
AArch64MCExpr::Specifier &DarwinSpec,
@@ -4478,6 +4478,19 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
44784478
if (HasELFModifier)
44794479
ImmVal = AArch64MCExpr::create(ImmVal, RefKind, getContext());
44804480

4481+
SMLoc EndLoc;
4482+
if (getContext().getAsmInfo()->hasSubsectionsViaSymbols()) {
4483+
if (getParser().parseAtSpecifier(ImmVal, EndLoc))
4484+
return true;
4485+
const MCExpr *Term;
4486+
if (parseOptionalToken(AsmToken::Plus)) {
4487+
if (getParser().parseExpression(Term, EndLoc))
4488+
return true;
4489+
ImmVal =
4490+
MCBinaryExpr::create(MCBinaryExpr::Add, ImmVal, Term, getContext());
4491+
}
4492+
}
4493+
44814494
return false;
44824495
}
44834496

@@ -5007,11 +5020,18 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
50075020

50085021
// This was not a register so parse other operands that start with an
50095022
// identifier (like labels) as expressions and create them as immediates.
5010-
const MCExpr *IdVal;
5023+
const MCExpr *IdVal, *Term;
50115024
S = getLoc();
50125025
if (getParser().parseExpression(IdVal))
50135026
return true;
5014-
E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
5027+
if (getParser().parseAtSpecifier(IdVal, E))
5028+
return true;
5029+
if (parseOptionalToken(AsmToken::Plus)) {
5030+
if (getParser().parseExpression(Term, E))
5031+
return true;
5032+
IdVal =
5033+
MCBinaryExpr::create(MCBinaryExpr::Add, IdVal, Term, getContext());
5034+
}
50155035
Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext()));
50165036

50175037
// Parse an optional shift/extend modifier.
@@ -8086,11 +8106,56 @@ bool AArch64AsmParser::parseDirectiveAeabiAArch64Attr(SMLoc L) {
80868106
return false;
80878107
}
80888108

8089-
bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8090-
// Try @AUTH expressions: they're more complex than the usual symbol variants.
8091-
if (!parseAuthExpr(Res, EndLoc))
8109+
bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) {
8110+
SMLoc EndLoc;
8111+
8112+
if (getParser().parseExpression(Res))
8113+
return true;
8114+
MCAsmParser &Parser = getParser();
8115+
if (!parseOptionalToken(AsmToken::At))
80928116
return false;
8093-
return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8117+
if (getLexer().getKind() != AsmToken::Identifier)
8118+
return Error(getLoc(), "expected relocation specifier");
8119+
8120+
std::string Identifier = Parser.getTok().getIdentifier().lower();
8121+
SMLoc Loc = getLoc();
8122+
Lex();
8123+
if (Identifier == "auth")
8124+
return parseAuthExpr(Res, EndLoc);
8125+
8126+
auto Spec = AArch64MCExpr::None;
8127+
if (STI->getTargetTriple().isOSBinFormatMachO()) {
8128+
if (Identifier == "got")
8129+
Spec = AArch64MCExpr::M_GOT;
8130+
} else {
8131+
// Unofficial, experimental syntax that will be changed.
8132+
if (Identifier == "gotpcrel")
8133+
Spec = AArch64MCExpr::VK_GOTPCREL;
8134+
else if (Identifier == "plt")
8135+
Spec = AArch64MCExpr::VK_PLT;
8136+
}
8137+
if (Spec == AArch64MCExpr::None)
8138+
return Error(Loc, "invalid relocation specifier");
8139+
if (auto *SRE = dyn_cast<MCSymbolRefExpr>(Res))
8140+
Res = MCSymbolRefExpr::create(&SRE->getSymbol(), Spec, getContext(),
8141+
SRE->getLoc());
8142+
else
8143+
return Error(Loc, "@ specifier only allowed after a symbol");
8144+
8145+
for (;;) {
8146+
std::optional<MCBinaryExpr::Opcode> Opcode;
8147+
if (parseOptionalToken(AsmToken::Plus))
8148+
Opcode = MCBinaryExpr::Add;
8149+
else if (parseOptionalToken(AsmToken::Minus))
8150+
Opcode = MCBinaryExpr::Sub;
8151+
else
8152+
break;
8153+
const MCExpr *Term;
8154+
if (getParser().parsePrimaryExpr(Term, EndLoc, nullptr))
8155+
return true;
8156+
Res = MCBinaryExpr::create(*Opcode, Res, Term, getContext());
8157+
}
8158+
return false;
80948159
}
80958160

80968161
/// parseAuthExpr
@@ -8100,54 +8165,8 @@ bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
81008165
bool AArch64AsmParser::parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc) {
81018166
MCAsmParser &Parser = getParser();
81028167
MCContext &Ctx = getContext();
8103-
81048168
AsmToken Tok = Parser.getTok();
81058169

8106-
// Look for '_sym@AUTH' ...
8107-
if (Tok.is(AsmToken::Identifier) && Tok.getIdentifier().ends_with("@AUTH")) {
8108-
StringRef SymName = Tok.getIdentifier().drop_back(strlen("@AUTH"));
8109-
if (SymName.contains('@'))
8110-
return TokError(
8111-
"combination of @AUTH with other modifiers not supported");
8112-
Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
8113-
8114-
Parser.Lex(); // Eat the identifier.
8115-
} else {
8116-
// ... or look for a more complex symbol reference, such as ...
8117-
SmallVector<AsmToken, 6> Tokens;
8118-
8119-
// ... '"_long sym"@AUTH' ...
8120-
if (Tok.is(AsmToken::String))
8121-
Tokens.resize(2);
8122-
// ... or '(_sym + 5)@AUTH'.
8123-
else if (Tok.is(AsmToken::LParen))
8124-
Tokens.resize(6);
8125-
else
8126-
return true;
8127-
8128-
if (Parser.getLexer().peekTokens(Tokens) != Tokens.size())
8129-
return true;
8130-
8131-
// In either case, the expression ends with '@' 'AUTH'.
8132-
if (Tokens[Tokens.size() - 2].isNot(AsmToken::At) ||
8133-
Tokens[Tokens.size() - 1].isNot(AsmToken::Identifier) ||
8134-
Tokens[Tokens.size() - 1].getIdentifier() != "AUTH")
8135-
return true;
8136-
8137-
if (Tok.is(AsmToken::String)) {
8138-
StringRef SymName;
8139-
if (Parser.parseIdentifier(SymName))
8140-
return true;
8141-
Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
8142-
} else {
8143-
if (Parser.parsePrimaryExpr(Res, EndLoc, nullptr))
8144-
return true;
8145-
}
8146-
8147-
Parser.Lex(); // '@'
8148-
Parser.Lex(); // 'AUTH'
8149-
}
8150-
81518170
// At this point, we encountered "<id>@AUTH". There is no fallback anymore.
81528171
if (parseToken(AsmToken::LParen, "expected '('"))
81538172
return true;

llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) {
7070
UsesELFSectionDirectiveForBSS = true;
7171
SupportsDebugInformation = true;
7272
UseDataRegionDirectives = true;
73+
UseAtForSpecifier = false;
7374

7475
ExceptionsType = ExceptionHandling::DwarfCFI;
7576

@@ -114,6 +115,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
114115
Data64bitsDirective = "\t.xword\t";
115116

116117
UseDataRegionDirectives = false;
118+
UseAtForSpecifier = false;
117119

118120
WeakRefDirective = "\t.weak\t";
119121

llvm/test/MC/AArch64/data-directive-specifier.s

+9-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-readobj -r - | FileCheck %s
2-
# RUN: not llvm-mc -triple=aarch64 -filetype=obj %s --defsym ERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
2+
# RUN: not llvm-mc -triple=aarch64 %s --defsym ERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
33
# RUN: not llvm-mc -triple=aarch64 -filetype=obj %s --defsym OBJERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=OBJERR --implicit-check-not=error:
44

55
.globl g
@@ -34,19 +34,21 @@ data1:
3434

3535
## Test parse-time errors
3636
.ifdef ERR
37-
# ERR: [[#@LINE+1]]:14: error: invalid variant 'pageoff'
38-
.word extern@pageoff
37+
# ERR: [[#@LINE+1]]:9: error: @ specifier only allowed after a symbol
38+
.quad 3@plt - .
39+
40+
# ERR: [[#@LINE+1]]:9: error: expected ')'
41+
.quad (l@plt - .)
3942
.endif
4043

41-
## Test assemble-time errors
4244
.ifdef OBJERR
43-
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
44-
.word extern@plt - und
45-
4645
.quad g@plt - .
4746

4847
.word extern@gotpcrel - .
4948

49+
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
50+
.word extern@plt - und
51+
5052
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
5153
.word extern@gotpcrel - und
5254
.endif

0 commit comments

Comments
 (0)