Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 31699d7

Browse files
author
Aaron Boxer
authored
benchmarks: add benchmark comparing taskflow to good old thread pool (taskflow#383)
Thanks!!
1 parent f87d598 commit 31699d7

File tree

4 files changed

+227
-1
lines changed

4 files changed

+227
-1
lines changed

benchmarks/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,20 @@ target_link_libraries(
264264
)
265265
set_target_properties(graph_pipeline PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
266266

267+
## benchmark 13: comparison with simple thread pool
268+
add_executable(
269+
threadpool
270+
${TF_BENCHMARK_DIR}/threadpool/benchmark.cpp
271+
)
272+
target_include_directories(threadpool PRIVATE ${PROJECT_SOURCE_DIR}/3rd-party/CLI11)
273+
target_link_libraries(
274+
threadpool
275+
${PROJECT_NAME}
276+
${TBB_IMPORTED_TARGETS}
277+
${OpenMP_CXX_LIBRARIES}
278+
tf::default_settings
279+
)
280+
set_target_properties(threadpool PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS})
267281

268282
###############################################################################
269283
# CUDA benchmarks

benchmarks/benchmarks.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Results are illustrated in a plot and saved to `result.png`.
5252
-o result.png
5353
```
5454

55-
When the program completes, you will see a combined plot of all specified benchmarsk.
55+
When the program completes, you will see a combined plot of all specified benchmarks.
5656
The x-axis represents the growth of problem size and the y-axis denotes the runtime
5757
in millisecond.
5858

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#pragma once
2+
3+
#include <vector>
4+
#include <queue>
5+
#include <memory>
6+
#include <thread>
7+
#include <mutex>
8+
#include <condition_variable>
9+
#include <future>
10+
#include <functional>
11+
#include <stdexcept>
12+
#include <map>
13+
#include <type_traits>
14+
#include <iostream>
15+
16+
class ThreadPool
17+
{
18+
public:
19+
ThreadPool(size_t);
20+
template<class F, class... Args>
21+
auto enqueue(F&& f, Args&&... args)
22+
-> std::future<typename std::invoke_result<F, Args...>::type>;
23+
~ThreadPool();
24+
int thread_number(std::thread::id id)
25+
{
26+
if(id_map.find(id) != id_map.end())
27+
return (int)id_map[id];
28+
return -1;
29+
}
30+
size_t num_threads()
31+
{
32+
return num_threads_;
33+
}
34+
35+
static ThreadPool* get()
36+
{
37+
return instance(0);
38+
}
39+
static ThreadPool* instance(uint32_t numthreads)
40+
{
41+
std::unique_lock<std::mutex> lock(singleton_mutex);
42+
if(!singleton)
43+
singleton = new ThreadPool(numthreads ? numthreads : hardware_concurrency());
44+
return singleton;
45+
}
46+
static void release()
47+
{
48+
std::unique_lock<std::mutex> lock(singleton_mutex);
49+
delete singleton;
50+
singleton = nullptr;
51+
}
52+
static uint32_t hardware_concurrency()
53+
{
54+
return std::thread::hardware_concurrency();
55+
}
56+
57+
private:
58+
std::vector<std::thread> workers;
59+
std::queue<std::function<void()>> tasks;
60+
std::mutex queue_mutex;
61+
std::condition_variable condition;
62+
bool stop;
63+
std::map<std::thread::id, size_t> id_map;
64+
size_t num_threads_;
65+
static ThreadPool* singleton;
66+
static std::mutex singleton_mutex;
67+
};
68+
inline ThreadPool::ThreadPool(size_t threads) : stop(false), num_threads_(threads)
69+
{
70+
if(threads == 1)
71+
return;
72+
73+
for(size_t i = 0; i < threads; ++i)
74+
workers.emplace_back([this] {
75+
for(;;)
76+
{
77+
std::function<void()> task;
78+
{
79+
std::unique_lock<std::mutex> lock(this->queue_mutex);
80+
this->condition.wait(lock,
81+
[this] { return this->stop || !this->tasks.empty(); });
82+
if(this->stop && this->tasks.empty())
83+
return;
84+
task = std::move(this->tasks.front());
85+
this->tasks.pop();
86+
}
87+
task();
88+
}
89+
});
90+
size_t thread_count = 0;
91+
for(std::thread& worker : workers)
92+
{
93+
id_map[worker.get_id()] = thread_count;
94+
thread_count++;
95+
}
96+
}
97+
98+
// add new work item to the pool
99+
template<class F, class... Args>
100+
auto ThreadPool::enqueue(F&& f, Args&&... args)
101+
-> std::future<typename std::invoke_result<F, Args...>::type>
102+
{
103+
assert(num_threads_ > 1);
104+
using return_type = typename std::invoke_result<F, Args...>::type;
105+
106+
auto task = std::make_shared<std::packaged_task<return_type()>>(
107+
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
108+
109+
std::future<return_type> res = task->get_future();
110+
{
111+
std::unique_lock<std::mutex> lock(queue_mutex);
112+
if(stop)
113+
throw std::runtime_error("enqueue on stopped ThreadPool");
114+
115+
tasks.emplace([task]() { (*task)(); });
116+
}
117+
condition.notify_one();
118+
return res;
119+
}
120+
inline ThreadPool::~ThreadPool()
121+
{
122+
{
123+
std::unique_lock<std::mutex> lock(queue_mutex);
124+
stop = true;
125+
}
126+
condition.notify_all();
127+
for(std::thread& worker : workers)
128+
worker.join();
129+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#include <taskflow/taskflow.hpp>
2+
#include <chrono>
3+
#include "ThreadPool.hpp"
4+
5+
ThreadPool* ThreadPool::singleton = nullptr;
6+
std::mutex ThreadPool::singleton_mutex;
7+
tf::Executor executor;
8+
9+
class ChronoTimer {
10+
public:
11+
ChronoTimer(void) {
12+
}
13+
void start(void){
14+
startTime = std::chrono::high_resolution_clock::now();
15+
}
16+
void finish(std::string msg){
17+
auto finish = std::chrono::high_resolution_clock::now();
18+
std::chrono::duration<double> elapsed = finish - startTime;
19+
printf("%s : %f ms\n",msg.c_str(), elapsed.count() * 1000);
20+
}
21+
private:
22+
std::chrono::high_resolution_clock::time_point startTime;
23+
};
24+
25+
void benchFunc(uint64_t loopLen){
26+
float acc = 0;
27+
for (uint64_t k = 0; k < loopLen; ++k)
28+
acc += k;
29+
}
30+
31+
void bench(uint32_t iter){
32+
printf("Benchmark with %d iterations\n",iter);
33+
const uint64_t num_blocks = 1000;
34+
const uint64_t loopLen = 100;
35+
ChronoTimer timer;
36+
ThreadPool *pool = ThreadPool::get();
37+
38+
timer.start();
39+
for (uint64_t it = 0; it < iter; ++it) {
40+
tf::Taskflow taskflow;
41+
tf::Task node[num_blocks];
42+
for (uint64_t i = 0; i < num_blocks; i++)
43+
node[i] = taskflow.placeholder();
44+
for (uint64_t i = 0; i < num_blocks; i++) {
45+
node[i].work([=]() {
46+
benchFunc(loopLen);
47+
});
48+
}
49+
executor.run(taskflow).wait();
50+
}
51+
timer.finish("taskflow: time in ms: ");
52+
53+
timer.start();
54+
for (uint64_t it = 0; it < iter; ++it) {
55+
std::vector<std::future<int>> results;
56+
for (uint64_t i = 0; i < num_blocks; i++) {
57+
results.emplace_back(pool->enqueue([=]() {
58+
benchFunc(loopLen);
59+
60+
return 0;
61+
}));
62+
}
63+
for(auto& result : results)
64+
{
65+
result.get();
66+
}
67+
}
68+
timer.finish("threadpool: time in ms: ");
69+
}
70+
71+
int main() {
72+
for (uint32_t i = 0; i < 5; ++i)
73+
bench(100);
74+
for (uint32_t i = 0; i < 5; ++i)
75+
bench(50);
76+
for (uint32_t i = 0; i < 5; ++i)
77+
bench(20);
78+
for (uint32_t i = 0; i < 5; ++i)
79+
bench(10);
80+
for (uint32_t i = 0; i < 5; ++i)
81+
bench(5);
82+
83+
}

0 commit comments

Comments
 (0)