#ifndef THC_CACHING_HOST_ALLOCATOR_INC
#define THC_CACHING_HOST_ALLOCATOR_INC

#include "THCGeneral.h"

//
// A caching allocator for CUDA host allocations (pinned memory).
//
// This provides a drop-in replacement for THCudaHostAllocator, which re-uses
// freed pinned (page-locked) memory allocations. This avoids device
// synchronizations due to cudaFreeHost calls.
//
// To ensure correct behavior, THCCachingHostAllocator_recordEvent must be
// called anytime a pointer from this allocator is used in a cudaMemcpyAsync
// call between host and device. The THC library implements this for storages
// and tensors in THCTensor_(copyAsyncCPU) and THCTensor_(copyAsyncCuda).
//
// Note that this allocator does not split larger allocations into smaller
// blocks, unlike the caching device allocator.
//
THC_API THAllocator THCCachingHostAllocator;

// Records an event in the specified stream. The allocation 'ptr' will not be
// re-used until the event has occured.
THC_API cudaError_t THCCachingHostAllocator_recordEvent(void *ptr, cudaStream_t stream);

// Releases cached pinned memory allocations via cudaHostFree
THC_API void THCCachingHostAllocator_emptyCache(void);

#endif
