forked from OpenNMT/CTranslate2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslator_pool.h
More file actions
118 lines (102 loc) · 4.35 KB
/
Copy pathtranslator_pool.h
File metadata and controls
118 lines (102 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#pragma once
#include <future>
#include <istream>
#include <mutex>
#include <ostream>
#include <queue>
#include <thread>
#include "translator.h"
namespace ctranslate2 {
using TranslationInput = std::vector<std::vector<std::string>>;
using TranslationOutput = std::vector<TranslationResult>;
// A pool of Translators running in parallel.
class TranslatorPool {
public:
// "args" are forwarded to the Translator constructor.
template <typename... Args>
TranslatorPool(size_t num_replicas, size_t num_threads_per_replica, Args&&... args) {
set_num_threads(num_threads_per_replica);
_translator_pool.emplace_back(std::forward<Args>(args)...);
// On GPU, we currently don't benefit much from running instances in parallel, even
// when using separate streams. This could be revisited/improved in the future.
if (_translator_pool.back().device() == Device::CUDA)
num_replicas = 1;
for (size_t i = 1; i < num_replicas; ++i)
_translator_pool.emplace_back(_translator_pool.front());
for (auto& translator : _translator_pool)
_workers.emplace_back(&TranslatorPool::work_loop,
this,
std::ref(translator),
num_threads_per_replica);
}
~TranslatorPool();
// Run a translation job asynchronously.
std::future<TranslationOutput> post(const TranslationInput& source,
const TranslationOptions& options);
std::future<TranslationOutput> post(const TranslationInput& source,
const TranslationInput& target_prefix,
const TranslationOptions& options);
// Translate a stream in parallel.
// Results will be written in order as they are available so the stream content is
// never stored fully in memory.
template <typename Reader, typename Writer>
void consume_stream(std::istream& in,
std::ostream& out,
size_t max_batch_size,
const TranslationOptions& options,
Reader& reader,
Writer& writer) {
std::queue<std::future<TranslationOutput>> futures;
auto pop_results = [&futures, &out, &writer](bool blocking) {
static const auto zero_sec = std::chrono::seconds(0);
while (!futures.empty()
&& (blocking
|| futures.front().wait_for(zero_sec) == std::future_status::ready)) {
for (const auto& result : futures.front().get())
writer(out, result);
futures.pop();
}
};
TranslationInput batch_tokens;
std::vector<std::string> tokens;
while (reader(in, tokens)) {
batch_tokens.push_back(tokens);
tokens.clear();
if (batch_tokens.size() == max_batch_size) {
futures.emplace(post(batch_tokens, options));
batch_tokens.clear();
}
pop_results(false /* blocking */);
}
if (!batch_tokens.empty())
futures.emplace(post(batch_tokens, options));
pop_results(true /* blocking */);
}
// Translate a file in parallel.
// These are wrappers around consume_stream that set the appropriate reader and writer.
// The returned value is the total number of produced tokens.
size_t consume_text_file(const std::string& in_file,
const std::string& out_file,
size_t max_batch_size,
const TranslationOptions& options,
bool with_scores = false);
size_t consume_text_file(std::istream& in,
std::ostream& out,
size_t max_batch_size,
const TranslationOptions& options,
bool with_scores = false);
private:
struct TranslationJob {
TranslationInput source;
TranslationInput target_prefix;
TranslationOptions options;
};
void work_loop(Translator& translator, size_t intra_threads);
std::queue<std::pair<TranslationJob, std::promise<TranslationOutput>>> _work;
std::vector<std::thread> _workers;
std::vector<Translator> _translator_pool;
std::mutex _mutex;
std::condition_variable _cv;
bool _request_end = false;
};
}