Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8fb7a88

Browse files
authored
Reduce overhead of bindings requiring cuPythonInit() (NVIDIA#894)
* Reduce overhead of bindings requiring cuPythonInit() * Add changelog entry * Explicitly specific `inline`
1 parent 811cb93 commit 8fb7a88

File tree

4 files changed

+29
-9
lines changed

4 files changed

+29
-9
lines changed

cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -490,10 +490,8 @@ cdef bint __cuPythonInit = False
490490
ctypedef CUresult (*__cuGetProcAddress_v2_T)(const char*, void**, int, cuuint64_t, CUdriverProcAddressQueryResult*) except?CUDA_ERROR_NOT_FOUND nogil
491491
cdef __cuGetProcAddress_v2_T _F_cuGetProcAddress_v2 = NULL
492492

493-
cdef int cuPythonInit() except -1 nogil:
493+
cdef int _cuPythonInit() except -1 nogil:
494494
global __cuPythonInit
495-
if __cuPythonInit:
496-
return 0
497495

498496
cdef bint usePTDS
499497
cdef char libPath[260]
@@ -8883,6 +8881,14 @@ cdef int cuPythonInit() except -1 nogil:
88838881
__cuPythonInit = True
88848882
return 0
88858883

8884+
# Create a very small function to check whether we are init'ed, so the C
8885+
# compiler can inline it.
8886+
cdef inline int cuPythonInit() except -1 nogil:
8887+
if __cuPythonInit:
8888+
return 0
8889+
8890+
return _cuPythonInit()
8891+
88868892
{{if 'cuGetErrorString' in found_functions}}
88878893

88888894
cdef CUresult _cuGetErrorString(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil:

cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,8 @@ cdef bint __cuPythonInit = False
4040
{{if 'nvrtcGetPCHHeapSizeRequired' in found_functions}}cdef void *__nvrtcGetPCHHeapSizeRequired = NULL{{endif}}
4141
{{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}}
4242

43-
cdef int cuPythonInit() except -1 nogil:
43+
cdef int _cuPythonInit() except -1 nogil:
4444
global __cuPythonInit
45-
if __cuPythonInit:
46-
return 0
4745

4846
with gil, __symbol_lock:
4947
{{if 'Windows' == platform.system()}}
@@ -324,6 +322,14 @@ cdef int cuPythonInit() except -1 nogil:
324322
__cuPythonInit = True
325323
return 0
326324

325+
# Create a very small function to check whether we are init'ed, so the C
326+
# compiler can inline it.
327+
cdef inline int cuPythonInit() except -1 nogil:
328+
if __cuPythonInit:
329+
return 0
330+
331+
return _cuPythonInit()
332+
327333
{{if 'nvrtcGetErrorString' in found_functions}}
328334

329335
cdef const char* _nvrtcGetErrorString(nvrtcResult result) except ?NULL nogil:

cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,22 @@ cimport cython
1010

1111
cdef bint __cudaPythonInit = False
1212
cdef bint __usePTDS = False
13-
cdef int cudaPythonInit() except -1 nogil:
13+
cdef int _cudaPythonInit() except -1 nogil:
1414
global __cudaPythonInit
1515
global __usePTDS
16-
if __cudaPythonInit:
17-
return __usePTDS
1816
with gil:
1917
__usePTDS = os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=False)
2018
__cudaPythonInit = True
2119
return __usePTDS
2220

21+
# Create a very small function to check whether we are init'ed, so the C
22+
# compiler can inline it.
23+
cdef inline int cudaPythonInit() except -1 nogil:
24+
if __cudaPythonInit:
25+
return __usePTDS
26+
27+
return _cudaPythonInit()
28+
2329
{{if 'cudaDeviceReset' in found_functions}}
2430

2531
cdef cudaError_t _cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil:

cuda_bindings/docs/source/release/13.X.Y-notes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ Highlights
1616
* Automatic CUDA library path detection based on ``CUDA_HOME``, eliminating the need to manually set ``LIBRARY_PATH`` environment variables for installation.
1717
* The ``[all]`` optional dependencies now use ``cuda-toolkit`` with appropriate extras instead of individual packages. The NVCC compiler is no longer automatically installed with ``pip install cuda-python[all]`` as it was previously included only to access the NVVM library, which now has its own dedicated wheel. Users who need the NVCC compiler should explicitly install it with ``pip install cuda-toolkit[nvcc]==X.Y`` with the appropriate version for their needs.
1818

19+
* The Python overhead of calling functions in CUDA bindings in `driver`, `runtime` and `nvrtc` has been reduced by approximately 30%.
20+
1921

2022
Known issues
2123
------------

0 commit comments

Comments
 (0)