Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cf77a69

Browse files
author
Jenkins
committed
Compute Library v25.04
1 parent ed7e1c3 commit cf77a69

File tree

30 files changed

+15984
-135
lines changed

30 files changed

+15984
-135
lines changed

Android.bp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,7 @@ cc_library_static {
12271227
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp",
12281228
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp",
12291229
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16fp32_mla_6x16/generic.cpp",
1230+
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16fp32fp16_mla_6x16/generic.cpp",
12301231
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp",
12311232
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp",
12321233
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_6x16/generic.cpp",
@@ -1252,6 +1253,7 @@ cc_library_static {
12521253
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
12531254
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
12541255
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16fp32_mla_6x16/generic.cpp",
1256+
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16fp32fp16_mla_6x16/generic.cpp",
12551257
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
12561258
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
12571259
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
@@ -1340,6 +1342,7 @@ cc_library_static {
13401342
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp",
13411343
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp",
13421344
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16fp32_mla_6x4VL/generic.cpp",
1345+
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16fp32fp16_mla_6x4VL/generic.cpp",
13431346
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp",
13441347
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/generic.cpp",
13451348
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp",
@@ -1354,6 +1357,7 @@ cc_library_static {
13541357
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp",
13551358
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp",
13561359
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16fp32_mla_6x4VL/generic.cpp",
1360+
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16fp32fp16_mla_6x4VL/generic.cpp",
13571361
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp",
13581362
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp",
13591363
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp",

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2424

2525
project(
2626
ArmCompute
27-
VERSION 50.0.0
27+
VERSION 51.0.0
2828
DESCRIPTION
2929
"The Compute Library is a collection of low-level machine learning functions \
3030
optimized for Arm® Cortex®-A, Arm® Neoverse™ CPU and Arm® Mali™ GPU \

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
1010
</div>
1111

12-
# Compute Library ![](https://img.shields.io/badge/latest_release-25.03.1-green)
12+
# Compute Library ![](https://img.shields.io/badge/latest_release-25.04-green)
1313

1414

1515
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse™ and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
3737
<br>
3838

3939
## Documentation
40-
[![Documentation](https://img.shields.io/badge/documentation-25.03.1-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v25.03.1/index.xhtml)
40+
[![Documentation](https://img.shields.io/badge/documentation-25.04-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v25.04/index.xhtml)
4141

4242
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
4343
@@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
5050

5151
| Platform | Operating System | Release archive (Download) |
5252
| -------------- | ---------------- | -------------------------- |
53-
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-armv7a-cpu-bin.tar.gz) |
54-
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-bin.tar.gz) |
55-
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
56-
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
53+
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-armv7a-cpu-bin.tar.gz) |
54+
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-bin.tar.gz) |
55+
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-gpu-bin.tar.gz) |
56+
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-gpu-bin.tar.gz) |
5757

5858
<br>
5959

6060
| Architecture | Operating System | Release archive (Download) |
6161
| ------------ | ---------------- | -------------------------- |
62-
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-armv7a-cpu-gpu-bin.tar.gz) |
63-
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-android-aarch64-cpu-gpu-bin.tar.gz) |
64-
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.03.1/arm_compute-v25.03.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
62+
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-armv7a-cpu-gpu-bin.tar.gz) |
63+
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-android-aarch64-cpu-gpu-bin.tar.gz) |
64+
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v25.04/arm_compute-v25.04-linux-aarch64-cpu-gpu-bin.tar.gz) |
6565

6666
<br>
6767

68-
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v25.03.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v25.03.1)
68+
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v25.04-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v25.04)
6969

7070
Pre-build binaries are generated with the following security / good coding practices related flags:
7171
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -108,13 +108,13 @@ Pre-build binaries are generated with the following security / good coding pract
108108

109109
## Experimental builds
110110

111-
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v25.03.1/how_to_build.xhtml) for more details.
111+
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v25.04/how_to_build.xhtml) for more details.
112112

113113
<br>
114114

115115
## How to contribute
116116

117-
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v25.03.1/contribution_guidelines.xhtml).
117+
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v25.04/contribution_guidelines.xhtml).
118118

119119
### Developer Certificate of Origin (DCO)
120120
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)

SConscript

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ import codecs
3333
import platform
3434
import SCons
3535

36-
VERSION = "v25.03.1"
37-
LIBRARY_VERSION_MAJOR = 50
36+
VERSION = "v25.04"
37+
LIBRARY_VERSION_MAJOR = 51
3838
LIBRARY_VERSION_MINOR = 0
3939
LIBRARY_VERSION_PATCH = 0
4040
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)

arm_compute/function_info/GEMMInfo.h

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ class GEMMInfo
105105
_activation_info(),
106106
_fixed_format(false),
107107
_weight_format(arm_compute::WeightFormat::UNSPECIFIED),
108-
_accumulate(false)
108+
_accumulate(false),
109+
_use_fp32_acc(false)
109110
{
110111
}
111112
/** Constructor
@@ -127,6 +128,7 @@ class GEMMInfo
127128
* @param[in] weight_format (Optional) arm_gemm:WeightFormat enumeration requested by the user. Default is arm_compute::WeightFormat::UNSPECIFIED.
128129
* @param[in] pretranspose_B (Optional) Pretranspose matrix B (transposition of its lowest 2 dimensions), in addition to and before, any further transformations of B
129130
* @param[in] accumulate (Optional) Whether to accumulate in destination or not
131+
* @param[in] use_fp32_acc (Optional) Whether to use fp32 accumulation in fp16 matmul (applicable to fp16 matmul only, ignored in other configurations)
130132
*/
131133
GEMMInfo(bool is_a_reshaped,
132134
bool is_b_reshaped,
@@ -142,7 +144,8 @@ class GEMMInfo
142144
bool fixed_format = false,
143145
arm_compute::WeightFormat weight_format = arm_compute::WeightFormat::UNSPECIFIED,
144146
bool pretranspose_B = false,
145-
bool accumulate = false) noexcept
147+
bool accumulate = false,
148+
bool use_fp32_acc = false) noexcept
146149
: _is_a_reshaped(is_a_reshaped),
147150
_is_b_reshaped(is_b_reshaped),
148151
_reshape_b_only_on_first_run(reshape_b_only_on_first_run),
@@ -158,7 +161,8 @@ class GEMMInfo
158161
_activation_info(activation_info),
159162
_fixed_format(fixed_format),
160163
_weight_format(weight_format),
161-
_accumulate(accumulate)
164+
_accumulate(accumulate),
165+
_use_fp32_acc(use_fp32_acc)
162166
{
163167
}
164168
/** Flag which specifies if the matrix A has been reshaped
@@ -342,6 +346,10 @@ class GEMMInfo
342346
_accumulate = accumulate;
343347
}
344348

349+
/** Weight format to be used
350+
*
351+
* @return The selected weight format.
352+
*/
345353
arm_compute::WeightFormat weight_format() const
346354
{
347355
return _weight_format;
@@ -354,6 +362,22 @@ class GEMMInfo
354362
{
355363
_weight_format = weight_format;
356364
}
365+
/** Flag which specifies if the GEMM operation is running in f16 matmul with f32 accumulation.
366+
*
367+
* @return True if the GEMM operation is running in f16 matmul with f32 accumulation else false.
368+
*/
369+
bool use_fp32_acc() const
370+
{
371+
return _use_fp32_acc;
372+
}
373+
/** Set use_fp32_acc flag
374+
*
375+
* @param[in] use_fp32_acc set wheter or not to use f32 accumulation in f16 matmul
376+
*/
377+
void set_use_fp32_acc(bool use_fp32_acc)
378+
{
379+
_use_fp32_acc = use_fp32_acc;
380+
}
357381

358382
private:
359383
bool _is_a_reshaped;
@@ -372,6 +396,7 @@ class GEMMInfo
372396
bool _fixed_format;
373397
arm_compute::WeightFormat _weight_format;
374398
bool _accumulate;
399+
bool _use_fp32_acc;
375400
};
376401
} //namespace arm_compute
377402
#endif // ACL_ARM_COMPUTE_FUNCTION_INFO_GEMMINFO_H

docs/Doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ PROJECT_NAME = "Compute Library"
6060
# could be handy for archiving the generated documentation or if some version
6161
# control system is used.
6262

63-
PROJECT_NUMBER = 25.03.1
63+
PROJECT_NUMBER = 25.04
6464

6565
# Using the PROJECT_BRIEF tag one can provide an optional one line description
6666
# for a project that appears at the top of each page and should give viewer a

examples/neon_sgemm.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ class NESGEMMExample : public Example
9595
auto mode_opt = parser.add_option<utils::SimpleOption<std::string>>("mode", "static");
9696
mode_opt->set_help("GEMM mode. Allowed values: static, dynamic. Default value: static");
9797

98+
auto threads_ops = parser.add_option<utils::SimpleOption<int>>("threads", 0);
99+
threads_ops->set_help(
100+
"Number of threads to use. When 0 or not present - one thread per CPU core will be used.");
101+
98102
parser.parse(argc, argv);
99103

100104
if (help_opt->is_set() && help_opt->value())
@@ -253,6 +257,8 @@ class NESGEMMExample : public Example
253257
}
254258
}
255259

260+
Scheduler::get().set_num_threads(threads_ops->value());
261+
256262
// Dummy run for CLTuner
257263
sgemm.run();
258264

filelist.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1742,7 +1742,10 @@
17421742
"src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp",
17431743
"src/core/NEON/kernels/arm_gemm/gemm_fp16fp32.cpp",
17441744
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16fp32_mla_6x16/generic.cpp",
1745-
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16fp32_mla_6x16/generic.cpp"
1745+
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16fp32fp16_mla_6x16/generic.cpp",
1746+
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16fp32_mla_6x16/generic.cpp",
1747+
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16fp32fp16_mla_6x16/generic.cpp"
1748+
17461749
],
17471750
"estate32": [
17481751
"src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a53.cpp",
@@ -1808,7 +1811,9 @@
18081811
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp",
18091812
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp",
18101813
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16fp32_mla_6x4VL/generic.cpp",
1814+
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16fp32fp16_mla_6x4VL/generic.cpp",
18111815
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16fp32_mla_6x4VL/generic.cpp",
1816+
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16fp32fp16_mla_6x4VL/generic.cpp",
18121817
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp",
18131818
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp",
18141819
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp",

0 commit comments

Comments
 (0)