-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[PGO][Offload] Update PGO GPU tests #132262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[PGO][Offload] Update PGO GPU tests #132262
Conversation
65aae01
to
10cb0f6
Compare
@llvm/pr-subscribers-offload Author: Ethan Luis McDonough (EthanLuisMcDonough) ChangesThis pull request aims to improve the reliability of pgo1.c and pgo2.c Full diff: https://github.com/llvm/llvm-project/pull/132262.diff 2 Files Affected:
diff --git a/offload/test/offloading/gpupgo/pgo1.c b/offload/test/offloading/gpupgo/pgo1.c
index c8011cbae83c0..4fb11bf702aba 100644
--- a/offload/test/offloading/gpupgo/pgo1.c
+++ b/offload/test/offloading/gpupgo/pgo1.c
@@ -1,5 +1,6 @@
// RUN: %libomptarget-compile-generic -fcreate-profile \
-// RUN: -Xarch_device -fprofile-generate
+// RUN: -Xarch_device -fprofile-generate \
+// RUN: -Xarch_device -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -7,7 +8,8 @@
// RUN: %fcheck-generic --check-prefix="LLVM-PGO"
// RUN: %libomptarget-compile-generic -fcreate-profile \
-// RUN: -Xarch_device -fprofile-instr-generate
+// RUN: -Xarch_device -fprofile-instr-generate \
+// RUN: -Xarch_device -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -23,10 +25,12 @@ int test2(int a) { return a * 2; }
int main() {
int m = 2;
#pragma omp target
- for (int i = 0; i < 10; i++) {
- m = test1(m);
- for (int j = 0; j < 2; j++) {
- m = test2(m);
+ {
+ for (int i = 0; i < 10; i++) {
+ m = test1(m);
+ for (int j = 0; j < 2; j++) {
+ m = test2(m);
+ }
}
}
}
@@ -34,7 +38,7 @@ int main() {
// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-PGO: Counters: 4
-// LLVM-PGO: Block counts: [20, 10, 2, 1]
+// LLVM-PGO: Block counts: [20, 10, {{.*}}, 1]
// LLVM-PGO-LABEL: test1:
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
@@ -53,14 +57,10 @@ int main() {
// LLVM-PGO-SAME: 3
// LLVM-PGO-LABEL: Maximum function count:
// LLVM-PGO-SAME: 20
-// LLVM-PGO-LABEL: Maximum internal block count:
-// LLVM-PGO-SAME: 10
// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
-// CLANG-PGO: Counters: 3
-// CLANG-PGO: Function count: 0
-// CLANG-PGO: Block counts: [11, 20]
+// CLANG-PGO: Block counts: [10, 20]
// CLANG-PGO-LABEL: test1:
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
@@ -78,7 +78,5 @@ int main() {
// CLANG-PGO-SAME: Front-end
// CLANG-PGO-LABEL: Functions shown:
// CLANG-PGO-SAME: 3
-// CLANG-PGO-LABEL: Maximum function count:
-// CLANG-PGO-SAME: 20
// CLANG-PGO-LABEL: Maximum internal block count:
// CLANG-PGO-SAME: 20
diff --git a/offload/test/offloading/gpupgo/pgo2.c b/offload/test/offloading/gpupgo/pgo2.c
index b75b0beaffdec..820a597bd5108 100644
--- a/offload/test/offloading/gpupgo/pgo2.c
+++ b/offload/test/offloading/gpupgo/pgo2.c
@@ -1,4 +1,5 @@
-// RUN: %libomptarget-compile-generic -fprofile-generate
+// RUN: %libomptarget-compile-generic -fprofile-generate \
+// RUN: -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -8,7 +9,8 @@
// RUN: %target_triple.%basename_t.llvm.profraw \
// RUN: | %fcheck-generic --check-prefix="LLVM-DEVICE"
-// RUN: %libomptarget-compile-generic -fprofile-instr-generate
+// RUN: %libomptarget-compile-generic -fprofile-instr-generate \
+// RUN: -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -18,7 +20,8 @@
// RUN: %target_triple.%basename_t.clang.profraw | \
// RUN: %fcheck-generic --check-prefix="CLANG-DEV"
-// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate
+// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
+// RUN: -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.nogpu.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -27,7 +30,7 @@
// RUN: not test -e %target_triple.%basename_t.nogpu.profraw
// RUN: %libomptarget-compile-generic -Xarch_host -fprofile-generate \
-// RUN: -Xarch_device -fprofile-instr-generate
+// RUN: -Xarch_device -fprofile-instr-generate -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.hidf.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -38,7 +41,7 @@
// RUN: | %fcheck-generic --check-prefix="CLANG-DEV"
// RUN: %libomptarget-compile-generic -Xarch_device -fprofile-generate \
-// RUN: -Xarch_host -fprofile-instr-generate
+// RUN: -Xarch_host -fprofile-instr-generate -fprofile-update=atomic
// RUN: env LLVM_PROFILE_FILE=%basename_t.hfdi.profraw \
// RUN: %libomptarget-run-generic 2>&1
// RUN: llvm-profdata show --all-functions --counts \
@@ -59,8 +62,10 @@ int main() {
int device_var = 1;
#pragma omp target
- for (int i = 0; i < 10; i++) {
- device_var *= i;
+ {
+ for (int i = 0; i < 10; i++) {
+ device_var *= i;
+ }
}
}
@@ -78,7 +83,7 @@ int main() {
// LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}}
// LLVM-DEVICE: Counters: 3
-// LLVM-DEVICE: Block counts: [10, 2, 1]
+// LLVM-DEVICE: Block counts: [10, {{.*}}, 1]
// LLVM-DEVICE: Instrumentation level: IR
// CLANG-HOST-LABEL: main:
@@ -97,6 +102,5 @@ int main() {
// CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
// CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}}
// CLANG-DEV: Counters: 2
-// CLANG-DEV: Function count: 0
-// CLANG-DEV: Block counts: [11]
+// CLANG-DEV: Block counts: [10]
// CLANG-DEV: Instrumentation level: Front-end
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Normal pragma omp target
is a little weird since it executes with multiple threads but masks all of them off except one. It's somewhat possible that this would interact with something? Also, AFAIK, all aligned accesses on AMDGPU should be 'stomic' at least in terms out the output. If this lowers to fine-grained memory maybe not.
I talked to @EthanLuisMcDonough but I want to repeat it here: We need to modify the exisiting (single-threaded!) tests to only look at the counters for user code. Those need to be stable w/o atomic accesses in a single-threaded environment. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume the remaining counters are for blocks that execute only after we have guarded against extra threads. The atomic tests look good. Maybe rename them into pgo_atomic_teams, pgo_atomic_threads.
This pull request aims to improve the reliability of pgo1.c and pgo2.c