Merge remote-tracking branch 'refs/remotes/origin/CPP' into CPP

at0m741 · at0m741 · commit fda7da139299 · 2025-11-02T16:24:47.000+01:00
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -9,6 +9,7 @@ option(DEBUG "Build with debug symbols" OFF)
 option(BUILD_PLUGINS "Build Clang/LLVM plugins" ON)
 option(BUILD_TESTS "Build test executables" ON)
 option(BUILD_PYBIND "Build Python bindings" OFF)
+option(USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -39,15 +40,12 @@ if (NOT GPU_NAME STREQUAL "none")
   message(STATUS "Detected NVIDIA GPU: ${GPU_NAME}")
   add_compile_definitions(TENSORIUM_GPU_PRESENT)
 else()
-  message(WARNING "⚠️ Aucun GPU NVIDIA détecté (ou nvidia-smi absent)")
+  message(WARNING "No NVIDIA GPU detected or nvidia-smi missing")
 endif()
 
-
-option(USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF)
-
 if (USE_CUDA)
   if (NOT CUDAToolkit_FOUND)
-    message(FATAL_ERROR "USE_CUDA=ON mais aucun CUDA toolkit détecté ⚠️")
+    message(FATAL_ERROR "USE_CUDA=ON but CUDA toolkit not found")
   endif()
 
   message(STATUS "CUDA toolkit found at: ${CUDAToolkit_ROOT}")
@@ -63,18 +61,17 @@ if (USE_CUDA)
 
   set(CMAKE_CUDA_STANDARD 17)
   set(CMAKE_CUDA_STANDARD_REQUIRED ON)
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -Xcompiler=-fPIC")
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math -lineinfo -Wno-deprecated-gpu-targets")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -Xcompiler=-fPIC --use_fast_math -lineinfo -Wno-deprecated-gpu-targets")
 
   include_directories(${CUDAToolkit_INCLUDE_DIRS})
   link_directories(${CUDAToolkit_LIBRARY_DIR})
 
   message(STATUS "→ CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
-
 else()
   message(STATUS "CUDA support disabled (USE_CUDA=OFF)")
   add_compile_definitions(TENSORIUM_NO_CUDA)
 endif()
+
 include_directories(${CMAKE_SOURCE_DIR}/Includes)
 
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "amd64")
@@ -88,7 +85,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "am
     set(CMAKE_CXX_FLAGS "${BASE_FLAGS} ${AVX2_FLAGS}")
   endif()
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-  message(STATUS "Configuring for Apple Silicon ARM64: disabling AVX flags")
+  message(STATUS "Configuring for ARM64: disabling AVX flags")
   set(CMAKE_CXX_FLAGS "-O3 -mcpu=apple-m1 -Wno-ignored-attributes")
 else()
   message(WARNING "Unknown architecture (${CMAKE_SYSTEM_PROCESSOR}); using generic optimization flags.")
diff --git a/README.md b/README.md
@@ -1,13 +1,16 @@
 ![Nouveau projet](https://github.com/user-attachments/assets/5f75f1f9-999d-410b-971e-ba3bd5e8b5e9)
 # Tensorium_lib
-### !!!DISCLAMER!!! 
+> !!!DISCLAMER!!! 
 Tensorium_lib is still in the early development phase, and many of its features work, but I'm not yet convinced of the solidity of some of them (especially the tensor manipulations).
 The python binding is usable without any other python librairy, but I'm still working on it to make it all clean and usable using a simple pip3 install (see the Jupiter Notebook).
 
 **Tensorium_lib** is a high-performance scientific C++ library designed for demanding computational domains such as **numerical relativity**, **machine learning (ML)**, **deep learning (DL)** and general **scientific simulations**.
 
-Here is the full documentation : https://tensoriumcore.github.io/Tensorium_lib/
+## Documentation 
 
+> Here is the full documentation : https://tensoriumcore.github.io/Tensorium_lib/
+
+## Highlight
 It provides a modern, extensible infrastructure for efficient vector, matrix, and tensor computations by leveraging:
 - **SIMD acceleration** (SSE, AVX2, AVX512),
 - **Multithreading** with OpenMP,
@@ -24,6 +27,70 @@ This library is built with the goal of empowering projects that require both spe
 - Fast manipulation of large scientific datasets and image matrices (not atm),
 - Research and education projects needing intuitive yet high-performance numerical tools.
 
+##  Requirements
+
+>  **Recommended:** build and use with **LLVM/Clang** for maximum performance.
+
+###  Core Dependencies
+- **C++17/20 compiler** with `AVX2` / `FMA` support  
+  → `AVX512` is automatically detected and enabled if available  
+  → Recommended: **Clang ≥ 17** or **LLVM ≥ 20**  
+- **OpenMP** (`fopenmp`)
+- **MPI** (for distributed parallelism)
+- **libmemkind-dev** *(required only for Intel Xeon Phi Knight Landing CPUs)*
+- **CMake ≥ 3.16**
+- **Python ≥ 3.10** (for Python bindings)
+- **pybind11**  
+  - Arch Linux: `sudo pacman -S python-pybind11`  
+  - Other: `pip install pybind11 --user`
+- **OpenBLAS** *(optional)* — used for benchmarking against BLAS kernels
+
+---
+## Build Instructions
+
+###  Recommended LLVM/Clang Toolchain
+
+If you want the best performance, use **LLVM/Clang 20+**.
+
+### Install LLVM/Clang (example for Linux)
+
+```bash
+# Clone the official LLVM project
+git clone https://github.com/llvm/llvm-project.git
+cd llvm-project
+mkdir llvm-build-release && cd llvm-build-release
+
+# Configure the build
+cmake -G Ninja ../llvm \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DLLVM_ENABLE_PROJECTS="clang;mlir;lld;lldb;openmp" \
+  -DLLVM_TARGETS_TO_BUILD="X86;AArch64;NVPTX" \
+  -DLLVM_ENABLE_RTTI=ON \
+  -DCMAKE_INSTALL_PREFIX=/opt/llvm-20
+
+# Build & install
+ninja -j$(nproc)
+sudo ninja install
+```
+Then you can compile the Tensorium_lib. If you want to use it on your own projects, simply change the Test rule to Srcs (or another) and set the recommended options in the CmakeLists.txt file in the `
+Tests` folder, or add a src rule and create a src folder :
+then
+```cmake
+###inside the main CmakeLists.txt
+if(BUILD_SRCS)
+  add_subdirectory(SRCS)
+endif()
+```
+### Build the lib
+
+```bash
+git clone https://github.com/TensoriumCore/Tensorium_lib.git && cd Tensorium_lib
+mkdir build && cd build
+cmake .. (options if you need, a documentation is comming soon)
+make -j
+```
+The Python module will be created as a .so file in the pybuild/ directory.
+
 ## Highlights
 
 - Optimized `Tensor`, `Vector` and `Matrix` classes with aligned memory
@@ -45,43 +112,8 @@ This library is built with the goal of empowering projects that require both spe
 - Some (several) optimizations
 - Plug Tensorium_MLIR and externalize Compiler plugins (subdependencies)
 - ARM support 
-## Build Instructions
 
-### Requirements
-- !!! USE CLANG/LLVM if you want to use the max performances of this lib !!!
-- C++17/20 compiler with AVX2/FMA support or AVX512 if avalaible on your plateform (Intel compilers will be added later)
-- fopenmp
-- MPI
-- libmemkind-dev (if you are using Xeon Phi knight landing CPU)
-- CMake ≥ 3.16
-- Python ≥ 3.10 (for Python bindings)
-- `pybind11` installed (`pacman -S python-pybind11` on Arch, or `pip install pybind11 --user`)
-- OpenBLAS (optional, for benchmarking with BLAS)
 
-## Build over Nix for pythton binding
-
-```bash
-./build_linux.sh && pip install --user -e .
-```
-if you are on Macos :
-```bash
-nix --extra-experimental-features 'nix-command flakes' develop && ./build_macos && pip install --user -e .
-```
-
-Then you can use it as the .ipynb show
-### Build C++ only for special targets and options
-
-```bash
-make                # Default AVX2
-make help	    # Show differents compile options 
-make AVX512=true    # AVX512
-make USE_KNL=true   # MCDRAM Memkind HBW (Xeon phi KNL)
-make DEBUG=true     # debug symbols
-make VERBOSE=true   # VERBOSE log
-make benchmark      # BLAS vs Tensorium mat_mult benchmark
-```
-
-The Python module will be created as a .so file in the pybuild/ directory.
 ### Exemple using in C++
 ```cpp
 #include "Tensorium.hpp"
diff --git a/shell.nix b/shell.nix
@@ -0,0 +1,46 @@
+{ pkgs ? import <nixpkgs> {
+    config = {
+      allowUnfree = true;
+    };
+  }
+}:
+pkgs.mkShell {
+  buildInputs = with pkgs; [
+    vscode
+    gcc
+    openblas
+    openmpi
+    cloc
+    tree
+	doxygen 
+	graphviz 
+	bear
+
+    python312Full
+    (python312.withPackages (ps: with ps; [
+      pip
+      virtualenv
+      ipykernel
+      notebook
+      jupyter-client
+      pyzmq
+      pybind11
+    ]))
+  ] ++ (with llvmPackages_19; [
+    mlir
+    clang
+    llvm
+    libclang
+    openmp
+  ]);
+  shellHook = ''
+    if [ ! -d .venv ]; then
+      echo "[+] Creating .venv..."
+      python3 -m venv .venv
+      source .venv/bin/activate
+      pip install nanobind
+    else
+      source .venv/bin/activate
+    fi
+  '';
+}