-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[LLD][COFF] add __{data,bss}_{start,end}__ symbols for Cygwin support #136180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-platform-windows @llvm/pr-subscribers-lld Author: None (jeremyd2019) ChangesCygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. /cc @mstorsjo Full diff: https://github.com/llvm/llvm-project/pull/136180.diff 2 Files Affected:
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 7aa13bdce488e..72e25634c19d8 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2039,6 +2039,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
parseMerge(".ctors=.rdata");
parseMerge(".dtors=.rdata");
parseMerge(".CRT=.rdata");
+ parseMerge(".data_cygwin_nocopy=.data");
}
// Handle /section
@@ -2495,6 +2496,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->mingw) {
symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute("__data_start__", 0);
+ symtab.addAbsolute("__data_end__", 0);
+ symtab.addAbsolute("__bss_start__", 0);
+ symtab.addAbsolute("__bss_end__", 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 6ed1f884a9636..32a480e3126e2 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -239,6 +239,7 @@ class Writer {
void createRuntimePseudoRelocs();
void createECChunks();
void insertCtorDtorSymbols();
+ void insertBssDataStartEndSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
void markSymbolsForRVATable(ObjFile *file,
@@ -314,6 +315,7 @@ class Writer {
OutputSection *textSec;
OutputSection *hexpthkSec;
+ OutputSection *bssSec;
OutputSection *rdataSec;
OutputSection *buildidSec;
OutputSection *dataSec;
@@ -1077,7 +1079,7 @@ void Writer::createSections() {
textSec = createSection(".text", code | r | x);
if (isArm64EC(ctx.config.machine))
hexpthkSec = createSection(".hexpthk", code | r | x);
- createSection(".bss", bss | r | w);
+ bssSec = createSection(".bss", bss | r | w);
rdataSec = createSection(".rdata", data | r);
buildidSec = createSection(".buildid", data | r);
dataSec = createSection(".data", data | r | w);
@@ -1260,8 +1262,10 @@ void Writer::createMiscChunks() {
if (config->autoImport)
createRuntimePseudoRelocs();
- if (config->mingw)
+ if (config->mingw) {
insertCtorDtorSymbols();
+ insertBssDataStartEndSymbols();
+ }
}
// Create .idata section for the DLL-imported symbol table.
@@ -2369,6 +2373,31 @@ void Writer::insertCtorDtorSymbols() {
}
}
+// MinGW (really, Cygwin) specific.
+// The Cygwin startup code uses __data_start__ __data_end__ __bss_start__
+// and __bss_end__ to know what to copy during fork emulation.
+void Writer::insertBssDataStartEndSymbols() {
+ if (!dataSec->chunks.empty()) {
+ Symbol *dataStartSym = ctx.symtab.find("__data_start__");
+ Symbol *dataEndSym = ctx.symtab.find("__data_end__");
+ Chunk *endChunk = dataSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(dataStartSym, dataStartSym->getName(),
+ dataSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(dataEndSym, dataEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+
+ if (!bssSec->chunks.empty()) {
+ Symbol *bssStartSym = ctx.symtab.find("__bss_start__");
+ Symbol *bssEndSym = ctx.symtab.find("__bss_end__");
+ Chunk *endChunk = bssSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(bssStartSym, bssStartSym->getName(),
+ bssSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(bssEndSym, bssEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+}
+
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
|
@llvm/pr-subscribers-lld-coff Author: None (jeremyd2019) ChangesCygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. /cc @mstorsjo Full diff: https://github.com/llvm/llvm-project/pull/136180.diff 2 Files Affected:
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 7aa13bdce488e..72e25634c19d8 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2039,6 +2039,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
parseMerge(".ctors=.rdata");
parseMerge(".dtors=.rdata");
parseMerge(".CRT=.rdata");
+ parseMerge(".data_cygwin_nocopy=.data");
}
// Handle /section
@@ -2495,6 +2496,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->mingw) {
symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute("__data_start__", 0);
+ symtab.addAbsolute("__data_end__", 0);
+ symtab.addAbsolute("__bss_start__", 0);
+ symtab.addAbsolute("__bss_end__", 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 6ed1f884a9636..32a480e3126e2 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -239,6 +239,7 @@ class Writer {
void createRuntimePseudoRelocs();
void createECChunks();
void insertCtorDtorSymbols();
+ void insertBssDataStartEndSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
void markSymbolsForRVATable(ObjFile *file,
@@ -314,6 +315,7 @@ class Writer {
OutputSection *textSec;
OutputSection *hexpthkSec;
+ OutputSection *bssSec;
OutputSection *rdataSec;
OutputSection *buildidSec;
OutputSection *dataSec;
@@ -1077,7 +1079,7 @@ void Writer::createSections() {
textSec = createSection(".text", code | r | x);
if (isArm64EC(ctx.config.machine))
hexpthkSec = createSection(".hexpthk", code | r | x);
- createSection(".bss", bss | r | w);
+ bssSec = createSection(".bss", bss | r | w);
rdataSec = createSection(".rdata", data | r);
buildidSec = createSection(".buildid", data | r);
dataSec = createSection(".data", data | r | w);
@@ -1260,8 +1262,10 @@ void Writer::createMiscChunks() {
if (config->autoImport)
createRuntimePseudoRelocs();
- if (config->mingw)
+ if (config->mingw) {
insertCtorDtorSymbols();
+ insertBssDataStartEndSymbols();
+ }
}
// Create .idata section for the DLL-imported symbol table.
@@ -2369,6 +2373,31 @@ void Writer::insertCtorDtorSymbols() {
}
}
+// MinGW (really, Cygwin) specific.
+// The Cygwin startup code uses __data_start__ __data_end__ __bss_start__
+// and __bss_end__ to know what to copy during fork emulation.
+void Writer::insertBssDataStartEndSymbols() {
+ if (!dataSec->chunks.empty()) {
+ Symbol *dataStartSym = ctx.symtab.find("__data_start__");
+ Symbol *dataEndSym = ctx.symtab.find("__data_end__");
+ Chunk *endChunk = dataSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(dataStartSym, dataStartSym->getName(),
+ dataSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(dataEndSym, dataEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+
+ if (!bssSec->chunks.empty()) {
+ Symbol *bssStartSym = ctx.symtab.find("__bss_start__");
+ Symbol *bssEndSym = ctx.symtab.find("__bss_end__");
+ Chunk *endChunk = bssSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(bssStartSym, bssStartSym->getName(),
+ bssSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(bssEndSym, bssEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+}
+
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
|
I don't have rights to push/merge. (I seem to remember being asked to mention this in the past) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks reasonable, but would need some sort of test; have a look in lld/test/COFF
for something to use as model.
7b71533
to
05747de
Compare
Test added. Does that look right? I kind of expected |
Yes, that looks quite good, thanks!
Hmm, right - doesn't this mean that we end up including the BSS zero bytes in the actual linked file, while the normal idea is to order them at the end of |
I'll try it and see |
Doesn't seem to matter. The merges are stored in a |
I made an ugly hack to ensure $ diff -u /cygdrive/d/llvm-project/lld/test/COFF/cygwin-symbols.s lld/test/COFF/cygwin-symbols.s
--- /cygdrive/d/llvm-project/lld/test/COFF/cygwin-symbols.s 2025-04-23 13:24:02.769462400 -0700
+++ lld/test/COFF/cygwin-symbols.s 2025-04-23 13:22:16.017086100 -0700
@@ -79,7 +79,7 @@
# __data_end__ pointing at 0x140003009.
# DATANOBSS-NEXT: Contents of section .test:
# DATANOBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
-# DATANOBSS-NEXT: 140004010 0c300040 01000000 0c300040 01000000
+# DATANOBSS-NEXT: 140004010 18300040 01000000 18300040 01000000
# __bss_start__ pointing at 0x140003000 and
# __bss_end__ pointing at 0x140003009.
@@ -88,13 +88,13 @@
# BSSNODATA-NEXT: 140004010 00300040 01000000 09300040 01000000
# DATAANDBSS: Contents of section .data:
-# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 00000000
-# DATAANDBSS-NEXT: 140003010 00000000 00000000 03000000 00000000
-# DATAANDBSS-NEXT: 140003020 04
+# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 03000000
+# DATAANDBSS-NEXT: 140003010 00000000 04000000 00000000 00000000
+# DATAANDBSS-NEXT: 140003020 00
# __data_start__ pointing at 0x140003000 and
# __data_end__ pointing at 0x140003009.
-# __bss_start__ pointing at 0x14000300c and
-# __bss_end__ pointing at 0x140003015.
+# __bss_start__ pointing at 0x140003018 and
+# __bss_end__ pointing at 0x140003021.
# DATAANDBSS-NEXT: Contents of section .test:
# DATAANDBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
-# DATAANDBSS-NEXT: 140004010 0c300040 01000000 15300040 01000000
+# DATAANDBSS-NEXT: 140004010 18300040 01000000 21300040 01000000 |
Hmm, that's odd. Does LLD end up doing this overall in real world cases (both as lld-link in MSVC mode, and in mingw mode?), or is there something else differing compared with this test? |
I think it's section alignment in the output PE file maybe? When I had a small .bss
.zero 8192 and |
This comment was marked as outdated.
This comment was marked as outdated.
Not too horribly ugly? |
f0bc2be
to
bae6a29
Compare
Hmm, that's not too bad indeed. Before going ahead and blessing that approach, I'd like a second opinion from someone else though - is the second commit here acceptable to @cjacek, @aganea, @rnk or others? As users already can specify the |
I wouldn't be opposed to that (and adding a separate test of that). |
I think that'd be the clearest move here in any case; that should get enough visibility on its own, that I'd be comfortable approving it if it doesn't get feedback from others within a couple of days. |
bae6a29
to
067e6c9
Compare
Rebased. |
Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the .data_cygwin_nocopy section, which is merged into .data outside the __data_start__ to __data_end__ range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
067e6c9
to
fcd7a03
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
…llvm#136180) Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here. Cygwin also has the `.data_cygwin_nocopy` section, which is merged into `.data` outside the `__data_start__` to `__data_end__` range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too. Signed-off-by: Jeremy Drake <[email protected]>
Cygwin requires these symbols for its fork emulation to know what data to copy into the child. GNU ld defines these symbols for MinGW targets also, so do the same here.
Cygwin also has the
.data_cygwin_nocopy
section, which is merged into.data
outside the__data_start__
to__data_end__
range. This excludes it from fork's copying. AFAIK it's only used by the Cygwin DLL itself (which requires a custom linker script to link, that's not supported by LLD), but the section is included in GNU ld's default linker script so handle it here too.