@@ -1973,57 +1973,48 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
1973
1973
std::vector<TensorStorage> processed_tensor_storages;
1974
1974
1975
1975
{
1976
+ std::unordered_map<std::string, TensorStorage> processed_map;
1977
+ std::mutex map_mutex;
1978
+
1976
1979
int n_threads = std::min ((int )std::thread::hardware_concurrency (), (int )tensor_storages.size ());
1977
1980
if (n_threads < 1 ) {
1978
1981
n_threads = 1 ;
1979
1982
}
1980
-
1981
- std::vector<std::unordered_map<std::string, TensorStorage> > local_maps (n_threads);
1982
1983
std::vector<std::thread> workers;
1983
- size_t chunk_size = (tensor_storages.size () + n_threads - 1 ) / n_threads;
1984
1984
1985
1985
for (int i = 0 ; i < n_threads; ++i) {
1986
1986
workers.emplace_back ([&, thread_id = i]() {
1987
- const size_t start = thread_id * chunk_size;
1988
- const size_t end = std::min (start + chunk_size, tensor_storages.size ());
1989
1987
1988
+ std::unordered_map<std::string, TensorStorage> local_processed_map;
1990
1989
std::vector<TensorStorage> temp_storages;
1991
- for (size_t j = start; j < end; ++j) {
1990
+
1991
+ for (size_t j = thread_id; j < tensor_storages.size (); j += n_threads) {
1992
1992
const auto & tensor_storage = tensor_storages[j];
1993
1993
if (is_unused_tensor (tensor_storage.name )) {
1994
1994
continue ;
1995
1995
}
1996
-
1996
+
1997
1997
temp_storages.clear ();
1998
1998
preprocess_tensor (tensor_storage, temp_storages);
1999
-
2000
- for (size_t k = 0 ; k < temp_storages. size (); ++k ) {
2001
- local_maps[thread_id][temp_storages[k] .name ] = temp_storages[k] ;
1999
+
2000
+ for (const auto & ts : temp_storages) {
2001
+ local_processed_map[ts .name ] = ts ;
2002
2002
}
2003
2003
}
2004
- });
2005
- }
2006
-
2007
- for (size_t i = 0 ; i < workers.size (); ++i) {
2008
- workers[i].join ();
2009
- }
2010
2004
2011
- std::unordered_map<std::string, TensorStorage> processed_map;
2012
- size_t total_keys = 0 ;
2013
- for (int i = 0 ; i < n_threads; ++i) {
2014
- total_keys += local_maps[i].size ();
2005
+ if (!local_processed_map.empty ()) {
2006
+ std::lock_guard<std::mutex> lock (map_mutex);
2007
+ processed_map.merge (local_processed_map);
2008
+ }
2009
+ });
2015
2010
}
2016
- processed_map.reserve (total_keys);
2017
-
2018
- for (int i = 0 ; i < n_threads; ++i) {
2019
- for (std::unordered_map<std::string, TensorStorage>::const_iterator it = local_maps[i].begin (); it != local_maps[i].end (); ++it) {
2020
- processed_map[it->first ] = it->second ;
2021
- }
2011
+ for (auto & w : workers) {
2012
+ w.join ();
2022
2013
}
2023
-
2014
+
2024
2015
processed_tensor_storages.reserve (processed_map.size ());
2025
- for (std::unordered_map<std::string, TensorStorage>::const_iterator it = processed_map. begin (); it != processed_map. end (); ++it ) {
2026
- processed_tensor_storages.push_back (it-> second );
2016
+ for (auto const & [name, ts] : processed_map) {
2017
+ processed_tensor_storages.push_back (ts );
2027
2018
}
2028
2019
}
2029
2020
0 commit comments