Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fda7da1

Browse files
committed
Merge remote-tracking branch 'refs/remotes/origin/CPP' into CPP
2 parents afe11aa + c956b8c commit fda7da1

File tree

5 files changed

+179
-46
lines changed

5 files changed

+179
-46
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
---
2+
name: Bug report
3+
about: Create a report to help us improve
4+
title: ''
5+
labels: ''
6+
assignees: ''
7+
8+
---
9+
10+
**Describe the bug**
11+
A clear and concise description of what the bug is.
12+
13+
**To Reproduce**
14+
Steps to reproduce the behavior:
15+
1. Go to '...'
16+
2. Click on '....'
17+
3. Scroll down to '....'
18+
4. See error
19+
20+
**Expected behavior**
21+
A clear and concise description of what you expected to happen.
22+
23+
**Screenshots**
24+
If applicable, add screenshots to help explain your problem.
25+
26+
**Desktop (please complete the following information):**
27+
- OS: [e.g. iOS]
28+
- Browser [e.g. chrome, safari]
29+
- Version [e.g. 22]
30+
31+
**Smartphone (please complete the following information):**
32+
- Device: [e.g. iPhone6]
33+
- OS: [e.g. iOS8.1]
34+
- Browser [e.g. stock browser, safari]
35+
- Version [e.g. 22]
36+
37+
**Additional context**
38+
Add any other context about the problem here.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
name: Feature request
3+
about: Suggest an idea for this project
4+
title: ''
5+
labels: ''
6+
assignees: ''
7+
8+
---
9+
10+
**Is your feature request related to a problem? Please describe.**
11+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12+
13+
**Describe the solution you'd like**
14+
A clear and concise description of what you want to happen.
15+
16+
**Describe alternatives you've considered**
17+
A clear and concise description of any alternative solutions or features you've considered.
18+
19+
**Additional context**
20+
Add any other context or screenshots about the feature request here.

CMakeLists.txt

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ option(DEBUG "Build with debug symbols" OFF)
99
option(BUILD_PLUGINS "Build Clang/LLVM plugins" ON)
1010
option(BUILD_TESTS "Build test executables" ON)
1111
option(BUILD_PYBIND "Build Python bindings" OFF)
12+
option(USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF)
1213

1314
set(CMAKE_CXX_STANDARD 17)
1415
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -39,15 +40,12 @@ if (NOT GPU_NAME STREQUAL "none")
3940
message(STATUS "Detected NVIDIA GPU: ${GPU_NAME}")
4041
add_compile_definitions(TENSORIUM_GPU_PRESENT)
4142
else()
42-
message(WARNING "⚠️ Aucun GPU NVIDIA détecté (ou nvidia-smi absent)")
43+
message(WARNING "No NVIDIA GPU detected or nvidia-smi missing")
4344
endif()
4445

45-
46-
option(USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF)
47-
4846
if (USE_CUDA)
4947
if (NOT CUDAToolkit_FOUND)
50-
message(FATAL_ERROR "USE_CUDA=ON mais aucun CUDA toolkit détecté ⚠️")
48+
message(FATAL_ERROR "USE_CUDA=ON but CUDA toolkit not found")
5149
endif()
5250

5351
message(STATUS "CUDA toolkit found at: ${CUDAToolkit_ROOT}")
@@ -63,18 +61,17 @@ if (USE_CUDA)
6361

6462
set(CMAKE_CUDA_STANDARD 17)
6563
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
66-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -Xcompiler=-fPIC")
67-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math -lineinfo -Wno-deprecated-gpu-targets")
64+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -Xcompiler=-fPIC --use_fast_math -lineinfo -Wno-deprecated-gpu-targets")
6865

6966
include_directories(${CUDAToolkit_INCLUDE_DIRS})
7067
link_directories(${CUDAToolkit_LIBRARY_DIR})
7168

7269
message(STATUS "→ CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
73-
7470
else()
7571
message(STATUS "CUDA support disabled (USE_CUDA=OFF)")
7672
add_compile_definitions(TENSORIUM_NO_CUDA)
7773
endif()
74+
7875
include_directories(${CMAKE_SOURCE_DIR}/Includes)
7976

8077
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "amd64")
@@ -88,7 +85,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "am
8885
set(CMAKE_CXX_FLAGS "${BASE_FLAGS} ${AVX2_FLAGS}")
8986
endif()
9087
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
91-
message(STATUS "Configuring for Apple Silicon ARM64: disabling AVX flags")
88+
message(STATUS "Configuring for ARM64: disabling AVX flags")
9289
set(CMAKE_CXX_FLAGS "-O3 -mcpu=apple-m1 -Wno-ignored-attributes")
9390
else()
9491
message(WARNING "Unknown architecture (${CMAKE_SYSTEM_PROCESSOR}); using generic optimization flags.")

README.md

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
![Nouveau projet](https://github.com/user-attachments/assets/5f75f1f9-999d-410b-971e-ba3bd5e8b5e9)
22
# Tensorium_lib
3-
### !!!DISCLAMER!!!
3+
> !!!DISCLAMER!!!
44
Tensorium_lib is still in the early development phase, and many of its features work, but I'm not yet convinced of the solidity of some of them (especially the tensor manipulations).
55
The python binding is usable without any other python librairy, but I'm still working on it to make it all clean and usable using a simple pip3 install (see the Jupiter Notebook).
66

77
**Tensorium_lib** is a high-performance scientific C++ library designed for demanding computational domains such as **numerical relativity**, **machine learning (ML)**, **deep learning (DL)** and general **scientific simulations**.
88

9-
Here is the full documentation : https://tensoriumcore.github.io/Tensorium_lib/
9+
## Documentation
1010

11+
> Here is the full documentation : https://tensoriumcore.github.io/Tensorium_lib/
12+
13+
## Highlight
1114
It provides a modern, extensible infrastructure for efficient vector, matrix, and tensor computations by leveraging:
1215
- **SIMD acceleration** (SSE, AVX2, AVX512),
1316
- **Multithreading** with OpenMP,
@@ -24,6 +27,70 @@ This library is built with the goal of empowering projects that require both spe
2427
- Fast manipulation of large scientific datasets and image matrices (not atm),
2528
- Research and education projects needing intuitive yet high-performance numerical tools.
2629

30+
## Requirements
31+
32+
> **Recommended:** build and use with **LLVM/Clang** for maximum performance.
33+
34+
### Core Dependencies
35+
- **C++17/20 compiler** with `AVX2` / `FMA` support
36+
`AVX512` is automatically detected and enabled if available
37+
→ Recommended: **Clang ≥ 17** or **LLVM ≥ 20**
38+
- **OpenMP** (`fopenmp`)
39+
- **MPI** (for distributed parallelism)
40+
- **libmemkind-dev** *(required only for Intel Xeon Phi Knight Landing CPUs)*
41+
- **CMake ≥ 3.16**
42+
- **Python ≥ 3.10** (for Python bindings)
43+
- **pybind11**
44+
- Arch Linux: `sudo pacman -S python-pybind11`
45+
- Other: `pip install pybind11 --user`
46+
- **OpenBLAS** *(optional)* — used for benchmarking against BLAS kernels
47+
48+
---
49+
## Build Instructions
50+
51+
### Recommended LLVM/Clang Toolchain
52+
53+
If you want the best performance, use **LLVM/Clang 20+**.
54+
55+
### Install LLVM/Clang (example for Linux)
56+
57+
```bash
58+
# Clone the official LLVM project
59+
git clone https://github.com/llvm/llvm-project.git
60+
cd llvm-project
61+
mkdir llvm-build-release && cd llvm-build-release
62+
63+
# Configure the build
64+
cmake -G Ninja ../llvm \
65+
-DCMAKE_BUILD_TYPE=Release \
66+
-DLLVM_ENABLE_PROJECTS="clang;mlir;lld;lldb;openmp" \
67+
-DLLVM_TARGETS_TO_BUILD="X86;AArch64;NVPTX" \
68+
-DLLVM_ENABLE_RTTI=ON \
69+
-DCMAKE_INSTALL_PREFIX=/opt/llvm-20
70+
71+
# Build & install
72+
ninja -j$(nproc)
73+
sudo ninja install
74+
```
75+
Then you can compile the Tensorium_lib. If you want to use it on your own projects, simply change the Test rule to Srcs (or another) and set the recommended options in the CmakeLists.txt file in the `
76+
Tests` folder, or add a src rule and create a src folder :
77+
then
78+
```cmake
79+
###inside the main CmakeLists.txt
80+
if(BUILD_SRCS)
81+
add_subdirectory(SRCS)
82+
endif()
83+
```
84+
### Build the lib
85+
86+
```bash
87+
git clone https://github.com/TensoriumCore/Tensorium_lib.git && cd Tensorium_lib
88+
mkdir build && cd build
89+
cmake .. (options if you need, a documentation is comming soon)
90+
make -j
91+
```
92+
The Python module will be created as a .so file in the pybuild/ directory.
93+
2794
## Highlights
2895
2996
- Optimized `Tensor`, `Vector` and `Matrix` classes with aligned memory
@@ -45,43 +112,8 @@ This library is built with the goal of empowering projects that require both spe
45112
- Some (several) optimizations
46113
- Plug Tensorium_MLIR and externalize Compiler plugins (subdependencies)
47114
- ARM support
48-
## Build Instructions
49115
50-
### Requirements
51-
- !!! USE CLANG/LLVM if you want to use the max performances of this lib !!!
52-
- C++17/20 compiler with AVX2/FMA support or AVX512 if avalaible on your plateform (Intel compilers will be added later)
53-
- fopenmp
54-
- MPI
55-
- libmemkind-dev (if you are using Xeon Phi knight landing CPU)
56-
- CMake ≥ 3.16
57-
- Python ≥ 3.10 (for Python bindings)
58-
- `pybind11` installed (`pacman -S python-pybind11` on Arch, or `pip install pybind11 --user`)
59-
- OpenBLAS (optional, for benchmarking with BLAS)
60116
61-
## Build over Nix for pythton binding
62-
63-
```bash
64-
./build_linux.sh && pip install --user -e .
65-
```
66-
if you are on Macos :
67-
```bash
68-
nix --extra-experimental-features 'nix-command flakes' develop && ./build_macos && pip install --user -e .
69-
```
70-
71-
Then you can use it as the .ipynb show
72-
### Build C++ only for special targets and options
73-
74-
```bash
75-
make # Default AVX2
76-
make help # Show differents compile options
77-
make AVX512=true # AVX512
78-
make USE_KNL=true # MCDRAM Memkind HBW (Xeon phi KNL)
79-
make DEBUG=true # debug symbols
80-
make VERBOSE=true # VERBOSE log
81-
make benchmark # BLAS vs Tensorium mat_mult benchmark
82-
```
83-
84-
The Python module will be created as a .so file in the pybuild/ directory.
85117
### Exemple using in C++
86118
```cpp
87119
#include "Tensorium.hpp"

shell.nix

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{ pkgs ? import <nixpkgs> {
2+
config = {
3+
allowUnfree = true;
4+
};
5+
}
6+
}:
7+
pkgs.mkShell {
8+
buildInputs = with pkgs; [
9+
vscode
10+
gcc
11+
openblas
12+
openmpi
13+
cloc
14+
tree
15+
doxygen
16+
graphviz
17+
bear
18+
19+
python312Full
20+
(python312.withPackages (ps: with ps; [
21+
pip
22+
virtualenv
23+
ipykernel
24+
notebook
25+
jupyter-client
26+
pyzmq
27+
pybind11
28+
]))
29+
] ++ (with llvmPackages_19; [
30+
mlir
31+
clang
32+
llvm
33+
libclang
34+
openmp
35+
]);
36+
shellHook = ''
37+
if [ ! -d .venv ]; then
38+
echo "[+] Creating .venv..."
39+
python3 -m venv .venv
40+
source .venv/bin/activate
41+
pip install nanobind
42+
else
43+
source .venv/bin/activate
44+
fi
45+
'';
46+
}

0 commit comments

Comments
 (0)