Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2158bb0

Browse files
committed
updated cudaflow
1 parent 5059c26 commit 2158bb0

File tree

9 files changed

+538
-86
lines changed

9 files changed

+538
-86
lines changed

CMakeLists.txt

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,13 @@ target_include_directories(${PROJECT_NAME} INTERFACE
230230
# $<INSTALL_INTERFACE:include/>
231231
#)
232232
#
233-
#add_library(TensorFrame INTERFACE)
234-
#target_compile_features(TensorFrame INTERFACE cxx_std_17)
235-
#target_link_libraries(TensorFrame INTERFACE Threads::Threads)
236-
#target_include_directories(TensorFrame INTERFACE
237-
# $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
238-
# $<INSTALL_INTERFACE:include/>
239-
#)
233+
add_library(TensorFrame INTERFACE)
234+
target_compile_features(TensorFrame INTERFACE cxx_std_17)
235+
target_link_libraries(TensorFrame INTERFACE Threads::Threads)
236+
target_include_directories(TensorFrame INTERFACE
237+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
238+
$<INSTALL_INTERFACE:include/>
239+
)
240240

241241
# -----------------------------------------------------------------------------
242242
# Example program
@@ -321,12 +321,12 @@ target_link_libraries(
321321
)
322322
endif(CMAKE_CUDA_COMPILER AND TF_BUILD_CUDA)
323323

324-
##### TensorFrame Project
325-
#set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${TF_EXAMPLE_DIR}/tensorframe)
326-
#add_executable(add ${TF_EXAMPLE_DIR}/tensorframe/add.cpp)
327-
#target_link_libraries(
328-
# add TensorFrame Threads::Threads tf::default_settings
329-
#)
324+
#### TensorFrame Project
325+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${TF_EXAMPLE_DIR}/tensorframe)
326+
add_executable(add ${TF_EXAMPLE_DIR}/tensorframe/add.cpp)
327+
target_link_libraries(
328+
add TensorFrame Threads::Threads tf::default_settings
329+
)
330330

331331

332332
#### TaskflowDSL project
@@ -841,6 +841,27 @@ add_test(cuda_kmeans.1000.4C4G ${TF_UTEST_CUDA_KMEANS} -tc=kmeans.1000.4C4G)
841841
add_test(cuda_kmeans.1000.8C8G ${TF_UTEST_CUDA_KMEANS} -tc=kmeans.1000.8C8G)
842842
add_test(cuda_kmeans.1000.16C16G ${TF_UTEST_CUDA_KMEANS} -tc=kmeans.1000.16C16G)
843843

844+
# algorithm test
845+
add_executable(cuda_algorithm ${TF_UTEST_DIR}/cuda/cuda_algorithm.cu)
846+
target_link_libraries(
847+
cuda_algorithm ${PROJECT_NAME} Threads::Threads tf::default_settings
848+
)
849+
target_include_directories(cuda_algorithm PRIVATE ${TF_3RD_PARTY_DIR}/doctest)
850+
set(TF_UTEST_CUDA_ALG ${TF_UTEST_DIR}/cuda/cuda_algorithm)
851+
add_test(cuda_alg.add2.int ${TF_UTEST_CUDA_ALG} -tc=add2.int)
852+
add_test(cuda_alg.add2.float ${TF_UTEST_CUDA_ALG} -tc=add2.float)
853+
add_test(cuda_alg.add2.double ${TF_UTEST_CUDA_ALG} -tc=add2.double)
854+
add_test(cuda_alg.add3.int ${TF_UTEST_CUDA_ALG} -tc=add3.int)
855+
add_test(cuda_alg.add3.float ${TF_UTEST_CUDA_ALG} -tc=add3.float)
856+
add_test(cuda_alg.add3.double ${TF_UTEST_CUDA_ALG} -tc=add3.double)
857+
add_test(cuda_alg.multiply2.int ${TF_UTEST_CUDA_ALG} -tc=multiply2.int)
858+
add_test(cuda_alg.multiply2.float ${TF_UTEST_CUDA_ALG} -tc=multiply2.float)
859+
add_test(cuda_alg.multiply2.double ${TF_UTEST_CUDA_ALG} -tc=multiply2.double)
860+
add_test(cuda_alg.for_each.int ${TF_UTEST_CUDA_ALG} -tc=for_each.int)
861+
add_test(cuda_alg.for_each.float ${TF_UTEST_CUDA_ALG} -tc=for_each.float)
862+
add_test(cuda_alg.for_each.double ${TF_UTEST_CUDA_ALG} -tc=for_each.double)
863+
864+
844865
endif(CMAKE_CUDA_COMPILER AND TF_BUILD_CUDA)
845866

846867
endif(TF_BUILD_TESTS)

examples/tensorframe/add.cpp

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,36 @@
11
#include <taskflow/tensorframe.hpp>
22

33
int main(){
4+
5+
using type = decltype(std::declval<int>() + std::declval<int>());
6+
7+
tf::Tensor<float> tensor1({2, 3, 3, 4}, 10);
8+
tf::Tensor<float> tensor2({2, 3, 3, 4}, 10);
9+
tf::Tensor<float> tensor3({2, 3, 3, 4}, 10);
410

5-
tf::Tensor<float> tensor({2, 3, 3, 4}, 10);
6-
7-
tensor.dump(std::cout);
11+
tensor1.dump(std::cout);
812

9-
std::cout << tensor.flat_chunk_index(1, 2, 2, 3) << '\n';
10-
std::cout << tensor.flat_index(1, 2, 2, 3) << '\n';
11-
std::cout << tensor.chunk_size() << '\n';
13+
std::cout << tensor1.flat_chunk_index(1, 2, 2, 3) << '\n';
14+
std::cout << tensor1.flat_index(1, 2, 2, 3) << '\n';
15+
std::cout << tensor1.chunk_size() << '\n';
16+
17+
tf::TensorFrame<float> frame;
18+
19+
auto expr1 = frame.input(tensor1);
20+
auto expr2 = frame.input(tensor2);
21+
auto expr3 = frame.add(expr1, expr2);
22+
auto expr4 = frame.output(tensor3);
23+
24+
// todo
25+
// tf::Executor executor;
26+
// frame.optimize(OptimizationLevel=CPU);
27+
// executor.run(frame).wait();
28+
//
29+
// // now tensor2 has the value of tensor
1230

1331

1432
return 0;
1533
}
1634

35+
36+

taskflow/core/graph.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class Node {
165165
constexpr static auto CUDAFLOW_WORK = get_index_v<cudaFlowWork, handle_t>;
166166
#endif
167167

168-
template <typename ...Args>
168+
template <typename... Args>
169169
Node(Args&&... args);
170170

171171
~Node();
@@ -247,7 +247,7 @@ Node::ModuleWork::ModuleWork(T&& tf) : module {tf} {
247247
// ----------------------------------------------------------------------------
248248

249249
// Constructor
250-
template <typename ...Args>
250+
template <typename... Args>
251251
Node::Node(Args&&... args): _handle{std::forward<Args>(args)...} {
252252
}
253253

0 commit comments

Comments
 (0)