Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3153,6 +3153,29 @@ MXNET_DLL int MXEnginePushSyncND(EngineSyncFunc sync_func, void* func_param,
MXNET_DLL int MXCheckDynamicShapeOp(SymbolHandle sym_handle,
bool* has_dynamic_shape);

/*!
* \brief Push a new NVTX range. Requires building with CUDA and NVTX.
* \param name Name of the range.
* \param color Color used to display the range in the visual profiling tools.
* Encoded as 256*256*R + 256*G + B.
*/
MXNET_DLL int MXNVTXRangePush(const char * name, mx_uint color);

/*!
* \brief End the NVTX range. Requires building with CUDA and NVTX.
*/
MXNET_DLL int MXNVTXRangePop();

/*!
* \brief Start CUDA profiling session. Requires building with CUDA and NVTX.
*/
MXNET_DLL int MXCUDAProfilerStart();

/*!
* \brief End CUDA profiling session. Requires building with CUDA and NVTX.
*/
MXNET_DLL int MXCUDAProfilerStop();

#ifdef __cplusplus
}
#endif // __cplusplus
Expand Down
28 changes: 28 additions & 0 deletions python/mxnet/cuda/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Provides python interface to CUDA-related functions of the MXNet library"""

from ..base import _LIB, check_call
from . import nvtx

def cuda_profiler_start():
"""Starts the CUDA profiler"""
check_call(_LIB.MXCUDAProfilerStart())

def cuda_profiler_stop():
"""Stops the CUDA profiler"""
check_call(_LIB.MXCUDAProfilerStop())
53 changes: 53 additions & 0 deletions python/mxnet/cuda/nvtx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Utilities for NVTX usage in MXNet"""

from ..base import _LIB, mx_uint, c_str, check_call

# Palette of colors
RED = 0xFF0000
GREEN = 0x00FF00
BLUE = 0x0000FF
YELLOW = 0xB58900
ORANGE = 0xCB4B16
RED1 = 0xDC322F
MAGENTA = 0xD33682
VIOLET = 0x6C71C4
BLUE1 = 0x268BD2
CYAN = 0x2AA198
GREEN1 = 0x859900

def range_push(name, color=ORANGE):
"""Starts a new named NVTX range."""
check_call(_LIB.MXNVTXRangePush(
c_str(name),
mx_uint(color)))

def range_pop():
"""Ends a NVTX range."""
check_call(_LIB.MXNVTXRangePop())

class range:
def __init__(self, name, color=ORANGE):
self.name = name
self.color = color

def __enter__(self):
range_push(self.name, self.color)

def __exit__(self, exc_type, exc_val, exc_tb):
range_pop()
45 changes: 45 additions & 0 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@
#include <x86intrin.h>
#endif

#if MXNET_USE_CUDA
#include <cuda_profiler_api.h>
#endif
#include "../common/cuda/nvtx.h"

using namespace mxnet;

// Internal function to get the information
Expand Down Expand Up @@ -3939,3 +3944,43 @@ int MXShallowCopyNDArray(NDArrayHandle src_handle, NDArrayHandle* out) {
*out = ret;
API_END_HANDLE_ERROR(delete ret);
}

int MXNVTXRangePush(const char * name, mx_uint color) {
API_BEGIN();
#if MXNET_USE_CUDA && MXNET_USE_NVTX
mxnet::common::cuda::nvtx::gpuRangeStart(color, name);
#else
LOG(FATAL) << "Compile with USE_CUDA=1 and USE_NVTX=1 to have NVTX support.";
#endif
API_END();
}

int MXNVTXRangePop() {
API_BEGIN();
#if MXNET_USE_CUDA && MXNET_USE_NVTX
mxnet::common::cuda::nvtx::gpuRangeStop();
#else
LOG(FATAL) << "Compile with USE_CUDA=1 and USE_NVTX=1 to have NVTX support.";
#endif
API_END();
}

int MXCUDAProfilerStart() {
API_BEGIN();
#if MXNET_USE_CUDA && MXNET_USE_NVTX
cudaProfilerStart();
#else
LOG(FATAL) << "Compile with USE_CUDA=1 and USE_NVTX=1 to have CUDA profiler support.";
#endif
API_END();
}

int MXCUDAProfilerStop() {
API_BEGIN();
#if MXNET_USE_CUDA && MXNET_USE_NVTX
cudaProfilerStop();
#else
LOG(FATAL) << "Compile with USE_CUDA=1 and USE_NVTX=1 to have CUDA Profiler support.";
#endif
API_END();
}
104 changes: 104 additions & 0 deletions src/common/cuda/nvtx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef MXNET_COMMON_CUDA_NVTX_H_
#define MXNET_COMMON_CUDA_NVTX_H_

#if MXNET_USE_CUDA && MXNET_USE_NVTX
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvToolsExtCuda.h>
#include <vector>
#include <string>
#include <cstring>

namespace mxnet {
namespace common {
namespace cuda {

class NVTXDuration {
public:
explicit NVTXDuration(const char *name) noexcept
: range_id_(0), name_(name) {}

inline void start() {
range_id_ = nvtxRangeStartA(name_);
}

inline void stop() {
nvtxRangeEnd(range_id_);
}

private:
nvtxRangeId_t range_id_;
const char *name_;
};

// Utility class for NVTX
class nvtx {
public:
// Palette of colors (make sure to add new colors to the vector in nameToColor()).
static const uint32_t kRed = 0xFF0000;
static const uint32_t kGreen = 0x00FF00;
static const uint32_t kBlue = 0x0000FF;
static const uint32_t kYellow = 0xB58900;
static const uint32_t kOrange = 0xCB4B16;
static const uint32_t kRed1 = 0xDC322F;
static const uint32_t kMagenta = 0xD33682;
static const uint32_t kViolet = 0x6C71C4;
static const uint32_t kBlue1 = 0x268BD2;
static const uint32_t kCyan = 0x2AA198;
static const uint32_t kGreen1 = 0x859900;

static void gpuRangeStart(const uint32_t rgb, const std::string& range_name) {
nvtxEventAttributes_t att;
att.version = NVTX_VERSION;
att.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
att.colorType = NVTX_COLOR_ARGB;
att.color = rgb | 0xff000000;
att.messageType = NVTX_MESSAGE_TYPE_ASCII;
att.message.ascii = range_name.c_str();
nvtxRangePushEx(&att);
}

// Utility to map a range name prefix to a random color based on its hash
static uint32_t nameToColor(const std::string& range_name, int prefix_len) {
static std::vector<uint32_t> colors{kRed, kGreen, kBlue, kYellow, kOrange, kRed1, kMagenta,
kViolet, kBlue1, kCyan, kGreen1};
std::string s(range_name, 0, prefix_len);
std::hash<std::string> hash_fn;
return colors[hash_fn(s) % colors.size()];
}

// Utility to map a range name to a random color based on its hash
static uint32_t nameToColor(const std::string& range_name) {
return nameToColor(range_name, range_name.size());
}

static void gpuRangeStop() {
nvtxRangePop();
}
};

} // namespace cuda
} // namespace common
} // namespace mxnet

#endif // MXNET_UDE_CUDA && MXNET_USE_NVTX
#endif // MXNET_COMMON_CUDA_NVTX_H_
13 changes: 13 additions & 0 deletions src/engine/threaded_engine_perdevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "./thread_pool.h"
#include "../common/lazy_alloc_array.h"
#include "../common/utils.h"
#include "../common/cuda/nvtx.h"

namespace mxnet {
namespace engine {
Expand Down Expand Up @@ -275,7 +276,19 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
OpenMP::Get()->on_start_worker_thread(false);

while (task_queue->Pop(&opr_block)) {
#if MXNET_USE_NVTX
auto nvtx_name = opr_block->opr->opr_name != "" ? opr_block->opr->opr_name : "Op";
auto end_pos = nvtx_name.find('{');
auto name_prefix_len = end_pos != std::string::npos
? end_pos
: nvtx_name.size();
auto color = common::cuda::nvtx::nameToColor(nvtx_name, name_prefix_len);
common::cuda::nvtx::gpuRangeStart(color, nvtx_name);
#endif
this->ExecuteOprBlock(run_ctx, opr_block);
#if MXNET_USE_NVTX
common::cuda::nvtx::gpuRangeStop();
#endif
}
#else
ready_event->signal();
Expand Down
21 changes: 18 additions & 3 deletions src/imperative/imperative_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -1282,7 +1282,7 @@ inline void CreateEngineOpSeg(const nnvm::IndexedGraph& idx,
std::vector<EngineOprSeg>* opr_segs) {
size_t seg_start = start_nid;
std::vector<std::shared_ptr<exec::OpExecutor> > seg_execs;
std::string opr_names;
std::string opr_names = "[";
for (size_t nid = start_nid; nid < end_nid; ++nid) {
const auto& node = idx[nid];
if (node.source->is_variable())
Expand All @@ -1302,6 +1302,8 @@ inline void CreateEngineOpSeg(const nnvm::IndexedGraph& idx,
auto& seg = (*opr_segs)[seg_start];
if (seg_execs.size()) {
seg = EngineOprSeg{false, nid};
opr_names.pop_back();
opr_names += "]";
seg.opr.reset(CreateEngineOp(default_ctx, seg_execs, opr_names.c_str()));
} else {
seg = EngineOprSeg{true, nid, nullptr};
Expand All @@ -1312,9 +1314,18 @@ inline void CreateEngineOpSeg(const nnvm::IndexedGraph& idx,
}

seg_execs.push_back(exec);
if (opr_names.size())
opr_names += ",";

const auto& inode = idx[nid];
opr_names += op_name;
opr_names += "{name=" + inode.source->attrs.name + ";";
const std::unordered_map<std::string, std::string> &dict = inode.source->attrs.dict;
auto num_dict_entries = dict.size();
for (auto &k : dict) {
opr_names += k.first + "=" + k.second;
if (--num_dict_entries != 0)
opr_names += ";";
}
opr_names += "},";

auto& seg = (*opr_segs)[nid];
if (!valid) {
Expand All @@ -1324,6 +1335,8 @@ inline void CreateEngineOpSeg(const nnvm::IndexedGraph& idx,
seg_start = nid + 1;
} else if (is_async) {
seg = EngineOprSeg{false, nid + 1};
opr_names.pop_back();
opr_names += "]";
seg.opr.reset(CreateEngineOp(default_ctx, seg_execs, opr_names.c_str()));
seg_execs.clear();
opr_names.clear();
Expand All @@ -1335,6 +1348,8 @@ inline void CreateEngineOpSeg(const nnvm::IndexedGraph& idx,
auto& seg = (*opr_segs)[seg_start];
if (seg_execs.size()) {
seg = EngineOprSeg{false, end_nid};
opr_names.pop_back();
opr_names += "]";
seg.opr.reset(CreateEngineOp(default_ctx, seg_execs, opr_names.c_str()));
} else {
seg = EngineOprSeg{true, end_nid, nullptr};
Expand Down
20 changes: 0 additions & 20 deletions src/profiler/nvtx.cc

This file was deleted.

Loading