From e4fae532aab6b25c9d4fe65a307b4470b1c0e680 Mon Sep 17 00:00:00 2001
From: Sumeet Chhetri <sumeet.chhetri@gmail.com>
Date: Sat, 6 Nov 2021 20:05:26 +0530
Subject: [PATCH] Fixed issues with preparedstatements not getting applied in
 libpq Fixed issues with RequestHandler2 not being able to support libpq impl
 Added support for queued writes Updated csv.hpp and concurrentqueue headers
 [skip_build]

---
 CMakeLists.txt                                |   7 +
 Makefile.am                                   |   1 +
 configure.ac                                  |   1 +
 .../install_ffead-cpp-framework-forsql.sh     |   1 +
 .../ffead-cpp/install_ffead-cpp-framework.sh  |   1 +
 .../techempower-config/benchmark_config.json  |  86 +--
 .../ffead-cpp-base-debug.dockerfile           |   3 +-
 .../ffead-cpp-base.dockerfile                 |   3 +-
 ...l-raw-async-qw-clibpqb-profiled.dockerfile |   7 +
 ...ql-raw-async-qw-pool-profiled-m.dockerfile |   7 +
 ...ostgresql-raw-async-qw-profiled.dockerfile |   7 +
 .../ffead-cpp-seastar-base.dockerfile         |   1 +
 ...sync-clibpqb-pool-profiled-base.dockerfile |   5 -
 ...raw-async-clibpqb-profiled-base.dockerfile |   5 -
 .../install_ffead-cpp-framework.sh            |   3 +
 .../install_ffead-cpp-sql-raw-profiled.sh     |  13 +
 docker/techempower-config/run_ffead.sh        |  26 +-
 .../sql-async-profiled-install-clang-dbg.sh   |   2 +
 .../sql-async-profiled-install-clang.sh       |   3 +
 .../sql-async-profiled-install.sh             |   3 +
 .../techempower-config/sql-profiled-util.sh   |   6 +-
 ...kerFile-UbuntuBionic-x64-ffead-cpp-fortest |   4 +
 ...File-UbuntuBionic-x64-ffead-cpp-fortest-nb |   4 +
 ...untuBionic-x64-ffead-cpp-fortest_localhost |   4 +
 docker/test/mongodb/create.js                 |   2 +-
 docker/test/mysql/create.sql                  |  13 +-
 docker/test/postgresql/pg_hba.conf            |   2 +-
 docker/test/postgresql/pgdg.list              |   2 +-
 docker/test/postgresql/postgresql.conf        |   6 +-
 .../DockerFile-UbuntuBionic-x64-ffead-cpp     |   5 +-
 ...erFile-UbuntuBionic-x64-ffead-cpp_autoconf |   5 +-
 meson.build                                   |   1 +
 resources/server.prop                         |   2 +-
 script/server.sh                              |   4 +-
 src/modules/common/AppDefines.h               |  39 +-
 src/modules/common/blockingconcurrentqueue.h  | 439 +----------
 src/modules/common/concurrentqueue.h          | 581 ++++++++------
 src/modules/common/csv.hpp                    |   2 +-
 src/modules/common/lightweightsemaphore.h     | 411 ++++++++++
 .../sdorm/sql/libpq/LibpqDataSourceImpl.cpp   | 110 +--
 .../sdorm/sql/libpq/LibpqDataSourceImpl.h     |   7 +-
 src/modules/server-util/RequestHandler2.cpp   |  31 +-
 src/modules/server-util/RequestHandler2.h     |   6 +-
 src/modules/server-util/SocketInterface.cpp   |   7 +
 src/modules/server-util/SocketInterface.h     |  36 +-
 src/modules/ssl/SSLCommon.cpp                 |   1 +
 src/server/embedded/CHServer.cpp              |  11 +-
 .../CMakeLists.txt                            |  14 +
 .../config/application.xml                    |  24 +
 .../config/sdorm.xml                          |  15 +
 .../include/TeBkUmLpqQwAsync.h                | 192 +++++
 web/te-benchmark-um-pq-async-qw/meson.build   |  15 +
 .../src/TeBkUmLpqQwAsync.cpp                  | 715 ++++++++++++++++++
 .../src/autotools/Makefile.am                 |  17 +
 .../tpe/fortunes.tpe                          |  13 +
 web/te-benchmark-um-pq-async-qw/xmake.lua     |  12 +
 xmake.lua                                     |   1 +
 57 files changed, 2126 insertions(+), 818 deletions(-)
 create mode 100644 docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-clibpqb-profiled.dockerfile
 create mode 100644 docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-pool-profiled-m.dockerfile
 create mode 100644 docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-profiled.dockerfile
 create mode 100644 src/modules/common/lightweightsemaphore.h
 create mode 100644 web/te-benchmark-um-pq-async-qw/CMakeLists.txt
 create mode 100644 web/te-benchmark-um-pq-async-qw/config/application.xml
 create mode 100644 web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 create mode 100644 web/te-benchmark-um-pq-async-qw/include/TeBkUmLpqQwAsync.h
 create mode 100644 web/te-benchmark-um-pq-async-qw/meson.build
 create mode 100644 web/te-benchmark-um-pq-async-qw/src/TeBkUmLpqQwAsync.cpp
 create mode 100644 web/te-benchmark-um-pq-async-qw/src/autotools/Makefile.am
 create mode 100644 web/te-benchmark-um-pq-async-qw/tpe/fortunes.tpe
 create mode 100644 web/te-benchmark-um-pq-async-qw/xmake.lua

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a8227e218..abb2158ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -743,6 +743,7 @@ endif()
 if(MOD_SDORM_SQL)
 	add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq)
 	add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async)
+	add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw)
 endif()
 
 if(MOD_APACHE)
@@ -863,6 +864,7 @@ endif()
 if(NOT MOD_SDORM_SQL)
 	install(CODE "execute_process(COMMAND rm -rf ${PROJECT_NAME}-bin/web/te-benchmark-um-pq)")
 	install(CODE "execute_process(COMMAND rm -rf ${PROJECT_NAME}-bin/web/te-benchmark-um-pq-async)")
+	install(CODE "execute_process(COMMAND rm -rf ${PROJECT_NAME}-bin/web/te-benchmark-um-pq-async-qw)")
 endif()
 install(DIRECTORY ${PROJECT_SOURCE_DIR}/tests/ DESTINATION ${PROJECT_NAME}-bin/tests)
 if(CYGWIN OR MINGW)
@@ -892,8 +894,10 @@ if(CYGWIN OR MINGW)
 		if(MOD_SDORM_SQL)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq/cygte-benchmark-um-pq${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async/cygte-benchmark-um-pq-async${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
+			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async-qw/cygte-benchmark-um-pq-async-qw${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
+			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
 		endif()
 	else()
 		install(FILES ${PROJECT_BINARY_DIR}/src/modules/libffead-modules${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
@@ -911,6 +915,7 @@ if(CYGWIN OR MINGW)
 		if(MOD_SDORM_SQL)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
+			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
 		endif()
 		install(FILES ${PROJECT_BINARY_DIR}/src/modules/libffead-modules${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
 		install(FILES ${PROJECT_BINARY_DIR}/src/framework/libffead-framework${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
@@ -927,6 +932,7 @@ if(CYGWIN OR MINGW)
 		if(MOD_SDORM_SQL)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
 			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
+			install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT}.a DESTINATION ${PROJECT_NAME}-bin/lib)
 		endif()
 	endif()
 	install(FILES ${PROJECT_BINARY_DIR}/ffead-cpp.exe DESTINATION ${PROJECT_NAME}-bin/)
@@ -952,6 +958,7 @@ else()
 	if(MOD_SDORM_SQL)
 		install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
 		install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
+		install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)
 	endif()
 	if(SRV_ALL)
 		install(FILES ${PROJECT_BINARY_DIR}/ffead-cpp DESTINATION ${PROJECT_NAME}-bin/)
diff --git a/Makefile.am b/Makefile.am
index 8e4827f47..986ad3576 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -249,6 +249,7 @@ if MOD_SDORM_MONGO
 else
 	rm -rf ${prefix}/web/te-benchmark-um-pq
 	rm -rf ${prefix}/web/te-benchmark-um-pq-async
+	rm -rf ${prefix}/web/te-benchmark-um-pq-async-qw
 endif
 	cp -Rf script/* ${prefix}/
 	-rm -f ${prefix}/lib/*.la ${prefix}/lib/*.lai
diff --git a/configure.ac b/configure.ac
index 9e3a2759f..478d2a1c6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1301,6 +1301,7 @@ AC_CONFIG_FILES([web/default/src/autotools/Makefile
 				web/te-benchmark-um/src/autotools/Makefile
 				web/te-benchmark-um-pq/src/autotools/Makefile
 				web/te-benchmark-um-pq-async/src/autotools/Makefile
+				web/te-benchmark-um-pq-async-qw/src/autotools/Makefile
 				web/te-benchmark-um-mgr/src/autotools/Makefile])
 AC_OUTPUT
 
diff --git a/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework-forsql.sh b/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework-forsql.sh
index ddc6e3cd6..d3f4d23f2 100644
--- a/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework-forsql.sh
+++ b/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework-forsql.sh
@@ -45,6 +45,7 @@ sed -i 's|localhost|db|g' web/te-benchmark-um/config/sdormpostgresql.xml
 sed -i 's|localhost|db|g' web/te-benchmark-um-mgr/config/sdorm.xml
 sed -i 's|localhost|db|g' web/te-benchmark-um-pq/config/sdorm.xml
 sed -i 's|localhost|db|g' web/te-benchmark-um-pq-async/config/sdorm.xml
+sed -i 's|localhost|db|g' web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 sed -i 's|127.0.0.1|db|g' resources/sample-odbcinst.ini
 sed -i 's|127.0.0.1|db|g' resources/sample-odbc.ini
 sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/default)||g' CMakeLists.txt
diff --git a/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework.sh b/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework.sh
index aa8ae2669..bef43e12e 100644
--- a/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework.sh
+++ b/docker/compose-techempower-tests/ffead-cpp/install_ffead-cpp-framework.sh
@@ -46,6 +46,7 @@ sed -i 's|localhost|db|g' web/te-benchmark-um/config/sdormpostgresql.xml
 sed -i 's|localhost|db|g' web/te-benchmark-um-mgr/config/sdorm.xml
 sed -i 's|localhost|db|g' web/te-benchmark-um-pq/config/sdorm.xml
 sed -i 's|localhost|db|g' web/te-benchmark-um-pq-async/config/sdorm.xml
+sed -i 's|localhost|db|g' web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 sed -i 's|127.0.0.1|db|g' resources/sample-odbcinst.ini
 sed -i 's|127.0.0.1|db|g' resources/sample-odbc.ini
 sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/default)||g' CMakeLists.txt
diff --git a/docker/techempower-config/benchmark_config.json b/docker/techempower-config/benchmark_config.json
index b3f55ec6b..62603327d 100644
--- a/docker/techempower-config/benchmark_config.json
+++ b/docker/techempower-config/benchmark_config.json
@@ -1,27 +1,6 @@
 {
 	"framework": "ffead-cpp",
 	"tests": [{
-		"p3-b": {
-			"db_url": "/te-benchmark-um-pq-async/db",
-			"query_url": "/te-benchmark-um-pq-async/queries?queries=",
-			"fortune_url": "/te-benchmark-um-pq-async/fortunes",
-			"update_url": "/te-benchmark-um-pq-async/updates?queries=",
-			"port": 8080,
-			"approach": "Realistic",
-			"classification": "Fullstack",
-			"database": "postgres",
-			"framework": "ffead-cpp",
-			"language": "C++",
-			"orm": "Raw",
-			"platform": "None",
-			"webserver": "ffead-cpp",
-			"os": "Linux",
-			"database_os": "Linux",
-			"display_name": "ffead-cpp-p3-b",
-			"notes": "async memory libpq batch patch profiled",
-			"versus": "",
-			"tags": []
-		},
 		"v-picov": {
 			"json_url": "/te-benchmark-um-pq/json",
 			"plaintext_url": "/plaintext",
@@ -41,7 +20,7 @@
 			"webserver": "picov",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-v",
+			"display_name": "ffead-cpp [v]",
 			"notes": "",
 			"versus": "",
 			"tags": []
@@ -65,7 +44,7 @@
 			"webserver": "picov",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-v-prof",
+			"display_name": "ffead-cpp [v-prof]",
 			"notes": "",
 			"versus": "",
 			"tags": []
@@ -89,7 +68,7 @@
 			"webserver": "picov",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-v-prof-b",
+			"display_name": "ffead-cpp [v-prof-b]",
 			"notes": "",
 			"versus": "",
 			"tags": []
@@ -110,7 +89,7 @@
 			"webserver": "ffead-cpp",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-pg-raw-prof",
+			"display_name": "ffead-cpp [pg-raw-prof]",
 			"notes": "memory profiled",
 			"versus": "",
 			"tags": []
@@ -152,12 +131,12 @@
 			"webserver": "ffead-cpp",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-pg-raw-async-prof",
+			"display_name": "ffead-cpp [pg-raw-async-prof]",
 			"notes": "async memory profiled",
 			"versus": "",
 			"tags": []
 		},
-		"postgresql-raw-async-pool-profiled": {
+		"postgresql-raw-async-clibpqb-profiled": {
 			"db_url": "/te-benchmark-um-pq-async/db",
 			"query_url": "/te-benchmark-um-pq-async/queries?queries=",
 			"fortune_url": "/te-benchmark-um-pq-async/fortunes",
@@ -173,8 +152,8 @@
 			"webserver": "ffead-cpp",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-pg-raw-async-prof-pool",
-			"notes": "async memory profiled",
+			"display_name": "ffead-cpp [pg-raw-async-prof-b]",
+			"notes": "async memory libpq batch patch profiled",
 			"versus": "",
 			"tags": []
 		},
@@ -192,16 +171,16 @@
 			"webserver": "ffead-cpp",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-pg-raw-async-prof-pool-m",
+			"display_name": "ffead-cpp [pg-raw-async-prof-pool-m]",
 			"notes": "async memory profiled",
 			"versus": "",
 			"tags": []
 		},
-		"postgresql-raw-async-clibpqb-profiled": {
-			"db_url": "/te-benchmark-um-pq-async/db",
-			"query_url": "/te-benchmark-um-pq-async/queries?queries=",
-			"fortune_url": "/te-benchmark-um-pq-async/fortunes",
-			"update_url": "/te-benchmark-um-pq-async/updates?queries=",
+		"postgresql-raw-async-qw-profiled": {
+			"db_url": "/te-benchmark-um-pq-async-qw/db",
+			"query_url": "/te-benchmark-um-pq-async-qw/queries?queries=",
+			"fortune_url": "/te-benchmark-um-pq-async-qw/fortunes",
+			"update_url": "/te-benchmark-um-pq-async-qw/updates?queries=",
 			"port": 8080,
 			"approach": "Realistic",
 			"classification": "Fullstack",
@@ -213,16 +192,16 @@
 			"webserver": "ffead-cpp",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-pg-raw-async-prof-b",
-			"notes": "async memory libpq batch patch profiled",
+			"display_name": "ffead-cpp [pg-raw-async-qw-prof]",
+			"notes": "async memory profiled",
 			"versus": "",
 			"tags": []
 		},
-		"postgresql-raw-async-clibpqb-pool-profiled": {
-			"db_url": "/te-benchmark-um-pq-async/db",
-			"query_url": "/te-benchmark-um-pq-async/queries?queries=",
-			"fortune_url": "/te-benchmark-um-pq-async/fortunes",
-			"update_url": "/te-benchmark-um-pq-async/updates?queries=",
+		"postgresql-raw-async-qw-clibpqb-profiled": {
+			"db_url": "/te-benchmark-um-pq-async-qw/db",
+			"query_url": "/te-benchmark-um-pq-async-qw/queries?queries=",
+			"fortune_url": "/te-benchmark-um-pq-async-qw/fortunes",
+			"update_url": "/te-benchmark-um-pq-async-qw/updates?queries=",
 			"port": 8080,
 			"approach": "Realistic",
 			"classification": "Fullstack",
@@ -234,10 +213,29 @@
 			"webserver": "ffead-cpp",
 			"os": "Linux",
 			"database_os": "Linux",
-			"display_name": "ffead-cpp-pg-raw-async-prof-b-pool",
+			"display_name": "ffead-cpp [pg-raw-async-qw-prof-b]",
 			"notes": "async memory libpq batch patch profiled",
 			"versus": "",
 			"tags": []
+		},
+		"postgresql-raw-async-qw-pool-profiled-m": {
+			"query_url": "/te-benchmark-um-pq-async-qw/queriem?queries=",
+			"update_url": "/te-benchmark-um-pq-async-qw/updatem?queries=",
+			"port": 8080,
+			"approach": "Realistic",
+			"classification": "Fullstack",
+			"database": "postgres",
+			"framework": "ffead-cpp",
+			"language": "C++",
+			"orm": "Raw",
+			"platform": "None",
+			"webserver": "ffead-cpp",
+			"os": "Linux",
+			"database_os": "Linux",
+			"display_name": "ffead-cpp [pg-raw-async-qw-prof-pool-m]",
+			"notes": "async memory profiled",
+			"versus": "",
+			"tags": []
 		}
 	}]
-}
\ No newline at end of file
+}
diff --git a/docker/techempower-config/ffead-cpp-base-debug.dockerfile b/docker/techempower-config/ffead-cpp-base-debug.dockerfile
index b1927892d..2e9b546b2 100644
--- a/docker/techempower-config/ffead-cpp-base-debug.dockerfile
+++ b/docker/techempower-config/ffead-cpp-base-debug.dockerfile
@@ -12,7 +12,8 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio
 #COPY te-benchmark-um/ /installs/te-benchmark-um/
 #COPY te-benchmark-um-pq/ /installs/te-benchmark-um-pq/
 #COPY te-benchmark-um-mgr/ /installs/te-benchmark-um-mgr/
-#COPY te-benchmark-um-mgr/ /installs/te-benchmark-um-pq-async/
+#COPY te-benchmark-um-pq-async/ /installs/te-benchmark-um-pq-async/
+#COPY te-benchmark-um-pq-async-qw/ /installs/te-benchmark-um-pq-async-qw/
 
 WORKDIR ${IROOT}
 
diff --git a/docker/techempower-config/ffead-cpp-base.dockerfile b/docker/techempower-config/ffead-cpp-base.dockerfile
index 64f5bf8e4..8136d589f 100644
--- a/docker/techempower-config/ffead-cpp-base.dockerfile
+++ b/docker/techempower-config/ffead-cpp-base.dockerfile
@@ -12,7 +12,8 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio
 #COPY te-benchmark-um/ /installs/te-benchmark-um/
 #COPY te-benchmark-um-pq/ /installs/te-benchmark-um-pq/
 #COPY te-benchmark-um-mgr/ /installs/te-benchmark-um-mgr/
-#COPY te-benchmark-um-mgr/ /installs/te-benchmark-um-pq-async/
+#COPY te-benchmark-um-pq-async/ /installs/te-benchmark-um-pq-async/
+#COPY te-benchmark-um-pq-async-qw/ /installs/te-benchmark-um-pq-async-qw/
 
 WORKDIR ${IROOT}
 
diff --git a/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-clibpqb-profiled.dockerfile b/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-clibpqb-profiled.dockerfile
new file mode 100644
index 000000000..39fa5ceda
--- /dev/null
+++ b/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-clibpqb-profiled.dockerfile
@@ -0,0 +1,7 @@
+FROM sumeetchhetri/ffead-cpp-sql-raw-async-clibpqb-profiled-base:6.0
+
+ENV IROOT=/installs
+
+WORKDIR /
+
+CMD ./run_ffead.sh ffead-cpp-6.0-sql emb postgresql-raw-async-qw memory
diff --git a/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-pool-profiled-m.dockerfile b/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-pool-profiled-m.dockerfile
new file mode 100644
index 000000000..1c81c0d9e
--- /dev/null
+++ b/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-pool-profiled-m.dockerfile
@@ -0,0 +1,7 @@
+FROM sumeetchhetri/ffead-cpp-sql-raw-async-pool-profiled-base:6.0
+
+ENV IROOT=/installs
+
+WORKDIR /
+
+CMD ./run_ffead.sh ffead-cpp-6.0-sql emb postgresql-raw-async-qw memory
diff --git a/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-profiled.dockerfile b/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-profiled.dockerfile
new file mode 100644
index 000000000..fc1de0736
--- /dev/null
+++ b/docker/techempower-config/ffead-cpp-postgresql-raw-async-qw-profiled.dockerfile
@@ -0,0 +1,7 @@
+FROM sumeetchhetri/ffead-cpp-sql-raw-async-profiled-base:6.0
+
+ENV IROOT=/installs
+
+WORKDIR /
+
+CMD ./run_ffead.sh ffead-cpp-6.0-sql emb postgresql-raw-async-qw memory
diff --git a/docker/techempower-config/ffead-cpp-seastar-base.dockerfile b/docker/techempower-config/ffead-cpp-seastar-base.dockerfile
index 96d27508f..3cc136ff6 100644
--- a/docker/techempower-config/ffead-cpp-seastar-base.dockerfile
+++ b/docker/techempower-config/ffead-cpp-seastar-base.dockerfile
@@ -10,6 +10,7 @@ RUN rm -f /usr/local/lib/libffead-* /usr/local/lib/libte_benc* /usr/local/lib/li
 	ln -s ${IROOT}/ffead-cpp-6.0/lib/libte-benchmark-um-pq.so /usr/local/lib/libte-benchmark-um-pq.so && \
 	ln -s ${IROOT}/ffead-cpp-6.0/lib/libte-benchmark-um-mgr.so /usr/local/lib/libte-benchmark-um-mgr.so && \
 	ln -s ${IROOT}/ffead-cpp-6.0/lib/libte-benchmark-um-pq-async.so /usr/local/lib/libte-benchmark-um-pq-async.so && \
+	ln -s ${IROOT}/ffead-cpp-6.0/lib/libte-benchmark-um-pq-async-qw.so /usr/local/lib/libte-benchmark-um-pq-async-qw.so && \
 	ln -s ${IROOT}/ffead-cpp-6.0/lib/libffead-modules.so /usr/local/lib/libffead-modules.so && \
 	ln -s ${IROOT}/ffead-cpp-6.0/lib/libffead-framework.so /usr/local/lib/libffead-framework.so && \
 	ln -s ${IROOT}/ffead-cpp-6.0/lib/libinter.so /usr/local/lib/libinter.so && \
diff --git a/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-pool-profiled-base.dockerfile b/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-pool-profiled-base.dockerfile
index d02b57a97..7f3cf59bd 100644
--- a/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-pool-profiled-base.dockerfile
+++ b/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-pool-profiled-base.dockerfile
@@ -83,11 +83,6 @@ COPY sql-profiled-util.sh ${IROOT}/
 RUN chmod 755 ${IROOT}/sql-profiled-util.sh
 RUN ./sql-profiled-util.sh batch clang async pool
 
-#COPY TeBkUmLpqAsync.cpp ${IROOT}/ffead-cpp-src/web/te-benchmark-um-pq-async/src/
-#COPY TeBkUmLpqAsync.h ${IROOT}/ffead-cpp-src/web/te-benchmark-um-pq-async/include/
-#COPY LibpqDataSourceImpl.cpp ${IROOT}/ffead-cpp-src/src/modules/sdorm/sql/libpq/
-#COPY LibpqDataSourceImpl.h ${IROOT}/ffead-cpp-src/src/modules/sdorm/sql/libpq/
-
 COPY sql-async-profiled-install-clang.sh install_ffead-cpp-sql-raw-profiled.sh ${IROOT}/
 RUN chmod 755 ${IROOT}/sql-async-profiled-install-clang.sh ${IROOT}/install_ffead-cpp-sql-raw-profiled.sh
 RUN ./sql-async-profiled-install-clang.sh batch
diff --git a/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-profiled-base.dockerfile b/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-profiled-base.dockerfile
index 589f89e57..38c674d7d 100644
--- a/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-profiled-base.dockerfile
+++ b/docker/techempower-config/ffead-cpp-sql-raw-async-clibpqb-profiled-base.dockerfile
@@ -83,11 +83,6 @@ COPY sql-profiled-util.sh ${IROOT}/
 RUN chmod 755 ${IROOT}/sql-profiled-util.sh
 RUN ./sql-profiled-util.sh batch clang async
 
-#COPY TeBkUmLpqAsync.cpp ${IROOT}/ffead-cpp-src/web/te-benchmark-um-pq-async/src/
-#COPY TeBkUmLpqAsync.h ${IROOT}/ffead-cpp-src/web/te-benchmark-um-pq-async/include/
-#COPY LibpqDataSourceImpl.cpp ${IROOT}/ffead-cpp-src/src/modules/sdorm/sql/libpq/
-#COPY LibpqDataSourceImpl.h ${IROOT}/ffead-cpp-src/src/modules/sdorm/sql/libpq/
-
 COPY sql-async-profiled-install-clang.sh install_ffead-cpp-sql-raw-profiled.sh ${IROOT}/
 RUN chmod 755 ${IROOT}/sql-async-profiled-install-clang.sh ${IROOT}/install_ffead-cpp-sql-raw-profiled.sh
 RUN ./sql-async-profiled-install-clang.sh batch
diff --git a/docker/techempower-config/install_ffead-cpp-framework.sh b/docker/techempower-config/install_ffead-cpp-framework.sh
index 5c0baafa3..0ac91a59d 100644
--- a/docker/techempower-config/install_ffead-cpp-framework.sh
+++ b/docker/techempower-config/install_ffead-cpp-framework.sh
@@ -23,11 +23,13 @@ chmod 755 *.sh resources/*.sh rtdcf/autotools/*.sh
 #rm -rf web/te-benchmark-um-pq
 #rm -rf web/te-benchmark-um-mgr
 #rm -rf web/te-benchmark-um-pq-async
+#rm -rf web/te-benchmark-um-pq-async-qw
 mv ${IROOT}/server.sh script/
 #mv ${IROOT}/te-benchmark-um web/
 #mv ${IROOT}/te-benchmark-um-pq web/
 #mv ${IROOT}/te-benchmark-um-mgr web/
 #mv ${IROOT}/te-benchmark-um-pq-async web/
+#mv ${IROOT}/te-benchmark-um-pq-async-qw web/
 sed -i 's|THRD_PSIZ=6|THRD_PSIZ='${SERV_THREADS}'|g' resources/server.prop
 sed -i 's|W_THRD_PSIZ=2|W_THRD_PSIZ='${WRIT_THREADS}'|g' resources/server.prop
 sed -i 's|ENABLE_CRS=true|ENABLE_CRS=false|g' resources/server.prop
@@ -54,6 +56,7 @@ sed -i 's|localhost|tfb-database|g' web/te-benchmark-um/config/sdormpostgresql.x
 sed -i 's|localhost|tfb-database|g' web/te-benchmark-um-pq/config/sdorm.xml
 sed -i 's|localhost|tfb-database|g' web/te-benchmark-um-mgr/config/sdorm.xml
 sed -i 's|localhost|tfb-database|g' web/te-benchmark-um-pq-async/config/sdorm.xml
+sed -i 's|localhost|tfb-database|g' web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 sed -i 's|127.0.0.1|tfb-database|g' resources/sample-odbcinst.ini
 sed -i 's|127.0.0.1|tfb-database|g' resources/sample-odbc.ini
 sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/default)||g' CMakeLists.txt
diff --git a/docker/techempower-config/install_ffead-cpp-sql-raw-profiled.sh b/docker/techempower-config/install_ffead-cpp-sql-raw-profiled.sh
index 5038e14e0..e6d4eb5a4 100644
--- a/docker/techempower-config/install_ffead-cpp-sql-raw-profiled.sh
+++ b/docker/techempower-config/install_ffead-cpp-sql-raw-profiled.sh
@@ -53,6 +53,18 @@ service postgresql start
 
 sed -i 's|EVH_SINGLE=false|EVH_SINGLE=true|g' resources/server.prop
 #sed -i 's|LOGGING_ENABLED=false|LOGGING_ENABLED=true|g' resources/server.prop
+
+if [ "$1" = "async" ]
+then
+	sed -i 's|REQUEST_HANDLER=RequestReaderHandler|REQUEST_HANDLER=RequestHandler2|g' resources/server.prop
+fi
+
+if [ "$1" = "async-qw" ]
+then
+	sed -i 's|REQUEST_HANDLER=RequestReaderHandler|REQUEST_HANDLER=RequestHandler2|g' resources/server.prop
+	sed -i 's|QUEUED_WRITES=false|QUEUED_WRITES=true|g' resources/server.prop
+fi
+
 nohup bash -c "./server.sh > ffead.log &"
 sleep 10
 echo "ffead-cpp with sql-raw support launched"
@@ -76,6 +88,7 @@ wrk -H 'Host: localhost' -H 'Accept: application/json,text/html;q=0.9,applicatio
 	-H 'Connection: keep-alive' --latency -d 5 -c 512 --timeout 8 -t 2 "http://localhost:8080/te-benchmark-um-pq${SUFFIX}/updatem?queries=20"
 wrk -H 'Host: localhost' -H 'Accept: application/json,text/html;q=0.9,application/xhtml+xml;q=0.9,application/xml;q=0.8,*/*;q=0.7' \
 	-H 'Connection: keep-alive' --latency -d 5 -c 512 --timeout 8 -t 2 "http://localhost:8080/te-benchmark-um-pq${SUFFIX}/update_?queries=20"
+
 echo "normal shutdown"
 rm -f serv.ctrl
 pkill ffead-cpp
diff --git a/docker/techempower-config/run_ffead.sh b/docker/techempower-config/run_ffead.sh
index 3e0bdddf3..cbe2165ba 100644
--- a/docker/techempower-config/run_ffead.sh
+++ b/docker/techempower-config/run_ffead.sh
@@ -11,6 +11,7 @@ ln -s ${FFEAD_CPP_PATH}/lib/libte-benchmark-um.so /usr/local/lib/libte-benchmark
 ln -s ${FFEAD_CPP_PATH}/lib/libte-benchmark-um-pq.so /usr/local/lib/libte-benchmark-um-pq.so
 ln -s ${FFEAD_CPP_PATH}/lib/libte-benchmark-um-mgr.so /usr/local/lib/libte-benchmark-um-mgr.so
 ln -s ${FFEAD_CPP_PATH}/lib/libte-benchmark-um-pq-async.so /usr/local/lib/libte-benchmark-um-pq-async.so
+ln -s ${FFEAD_CPP_PATH}/lib/libte-benchmark-um-pq-async-qw.so /usr/local/lib/libte-benchmark-um-pq-async-qw.so
 ln -s ${FFEAD_CPP_PATH}/lib/libffead-modules.so /usr/local/lib/libffead-modules.so
 ln -s ${FFEAD_CPP_PATH}/lib/libffead-framework.so /usr/local/lib/libffead-framework.so
 ln -s ${FFEAD_CPP_PATH}/lib/libinter.so /usr/local/lib/libinter.so
@@ -46,35 +47,40 @@ service memcached stop
 if [ "$3" = "mongo" ]
 then
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um
-	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 	cp -f ${WEB_DIR}/config/sdormmongo.xml ${WEB_DIR}/config/sdorm.xml
 elif [ "$3" = "mongo-raw" ]
 then
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um-mgr
-	rm -rf web/te-benchmark-um web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 elif [ "$3" = "mysql" ]
 then
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um
-	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 	cp -f ${WEB_DIR}/config/sdormmysql.xml ${WEB_DIR}/config/sdorm.xml
 elif [ "$3" = "postgresql" ]
 then
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um
-	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 	cp -f web/te-benchmark-um/config/sdormpostgresql.xml web/te-benchmark-um/config/sdorm.xml
 elif [ "$3" = "postgresql-raw" ]
 then
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um-pq
-	rm -rf web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 	sed -i 's|<async>true</async>|<async>false</async>|g' ${WEB_DIR}/config/sdorm.xml
 elif [ "$3" = "postgresql-raw-async" ]
 then
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um-pq-async
-	rm -rf web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq
+	rm -rf web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async-qw
+	sed -i 's|<async>false</async>|<async>true</async>|g' ${WEB_DIR}/config/sdorm.xml
+elif [ "$3" = "postgresql-raw-async-qw" ]
+then
+	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um-pq-async-qw
+	rm -rf web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
 	sed -i 's|<async>false</async>|<async>true</async>|g' ${WEB_DIR}/config/sdorm.xml
 else
 	WEB_DIR=$FFEAD_CPP_PATH/web/te-benchmark-um
-	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 fi
 
 if [ "$4" = "memory" ]
@@ -104,6 +110,11 @@ chmod 700 rtdcf/*
 if [ "$2" = "emb" ]
 then
 	sed -i 's|EVH_SINGLE=false|EVH_SINGLE=true|g' resources/server.prop
+	sed -i 's|REQUEST_HANDLER=RequestReaderHandler|REQUEST_HANDLER=RequestHandler2|g' $FFEAD_CPP_PATH/resources/server.prop
+	if [ "$3" = "postgresql-raw-async-qw" ]
+	then
+		sed -i 's|QUEUED_WRITES=false|QUEUED_WRITES=true|g' $FFEAD_CPP_PATH/resources/server.prop
+	fi
 	for i in $(seq 0 $(($(nproc --all)-1))); do
 		taskset -c $i ./ffead-cpp $FFEAD_CPP_PATH &
 	done
@@ -205,7 +216,6 @@ then
 	cd ${IROOT}
 	sed -i 's|"TeBkUmLpqRouter"|"TeBkUmLpqRouterPicoV"|g' ${WEB_DIR}/config/application.xml
 	sed -i 's|EVH_SINGLE=false|EVH_SINGLE=true|g' $FFEAD_CPP_PATH/resources/server.prop
-	sed -i 's|REQUEST_HANDLER=RequestReaderHandler|REQUEST_HANDLER=RequestHandler2|g' $FFEAD_CPP_PATH/resources/server.prop
 	for i in $(seq 0 $(($(nproc --all)-1))); do
 		taskset -c $i ./main --server_dir=$FFEAD_CPP_PATH --server_port=8080 &
 	done
diff --git a/docker/techempower-config/sql-async-profiled-install-clang-dbg.sh b/docker/techempower-config/sql-async-profiled-install-clang-dbg.sh
index 3b978e368..79eb5ea28 100644
--- a/docker/techempower-config/sql-async-profiled-install-clang-dbg.sh
+++ b/docker/techempower-config/sql-async-profiled-install-clang-dbg.sh
@@ -15,7 +15,9 @@ sed -i 's|cmake |CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake |g' $IROOT/ffead-c
 #sed -i 's|-fprofile-instr-generate=/tmp/cprof.prof|-fprofile-instr-generate=/tmp/cprofdi.prof|g' $IROOT/ffead-cpp-sql-raw/rtdcf/CMakeLists.txt.template
 apt update -yqq && apt install -yqq vim gdb net-tools telnet iputils-ping
 ./install_ffead-cpp-sql-raw-profiled.sh async
+./install_ffead-cpp-sql-raw-profiled.sh async-qw
 
 #mv $IROOT/ffead-cpp-sql-raw $IROOT/ffead-cpp-6.0-sql
 #sed -i 's|localhost|tfb-database|g' $IROOT/ffead-cpp-6.0-sql/web/te-benchmark-um-pq-async/config/sdorm.xml
+#sed -i 's|localhost|tfb-database|g' $IROOT/ffead-cpp-6.0-sql/web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 
diff --git a/docker/techempower-config/sql-async-profiled-install-clang.sh b/docker/techempower-config/sql-async-profiled-install-clang.sh
index 1e94a15b7..ab2f407c0 100644
--- a/docker/techempower-config/sql-async-profiled-install-clang.sh
+++ b/docker/techempower-config/sql-async-profiled-install-clang.sh
@@ -14,6 +14,7 @@ cd $IROOT/
 sed -i 's|cmake |CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake |g' $IROOT/ffead-cpp-sql-raw/resources/rundyn-automake.sh
 #sed -i 's|-fprofile-instr-generate=/tmp/cprof.prof|-fprofile-instr-generate=/tmp/cprofdi.prof|g' $IROOT/ffead-cpp-sql-raw/rtdcf/CMakeLists.txt.template
 ./install_ffead-cpp-sql-raw-profiled.sh async
+./install_ffead-cpp-sql-raw-profiled.sh async-qw
 rm -rf $IROOT/ffead-cpp-sql-raw
 
 cd $IROOT/ffead-cpp-src
@@ -34,9 +35,11 @@ cd $IROOT/
 sed -i 's|cmake |CC=/usr/bin/clang CXX=/usr/bin/clang++ cmake |g' $IROOT/ffead-cpp-sql-raw/resources/rundyn-automake.sh
 #sed -i 's|-fprofile-instr-use=/tmp/cprof.pgo|-fprofile-instr-use=/tmp/cprofdi.pgo|g' $IROOT/ffead-cpp-sql-raw/rtdcf/CMakeLists.txt.template
 ./install_ffead-cpp-sql-raw-profiled.sh async
+./install_ffead-cpp-sql-raw-profiled.sh async-qw
 mv $IROOT/ffead-cpp-sql-raw $IROOT/ffead-cpp-6.0-sql
 
 sed -i 's|localhost|tfb-database|g' $IROOT/ffead-cpp-6.0-sql/web/te-benchmark-um-pq-async/config/sdorm.xml
+sed -i 's|localhost|tfb-database|g' $IROOT/ffead-cpp-6.0-sql/web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 
 apt remove -yqq postgresql-13 postgresql-contrib-13 gnupg lsb-release
 apt autoremove -yqq
diff --git a/docker/techempower-config/sql-async-profiled-install.sh b/docker/techempower-config/sql-async-profiled-install.sh
index df1a2d184..3e50afcbf 100644
--- a/docker/techempower-config/sql-async-profiled-install.sh
+++ b/docker/techempower-config/sql-async-profiled-install.sh
@@ -13,6 +13,7 @@ service postgresql stop
 cd $IROOT/
 #sed -i 's|cmake |cmake -DCMAKE_EXE_LINKER_FLAGS="-fprofile-dir=/tmp/profile-data -fprofile-generate" -DCMAKE_CXX_FLAGS="-march=native -fprofile-dir=/tmp/profile-data -fprofile-generate" |g' $IROOT/ffead-cpp-sql-raw/resources/rundyn-automake.sh
 ./install_ffead-cpp-sql-raw-profiled.sh async
+./install_ffead-cpp-sql-raw-profiled.sh async-qw
 rm -rf $IROOT/ffead-cpp-sql-raw
 
 cd $IROOT/ffead-cpp-src
@@ -29,9 +30,11 @@ service postgresql stop
 cd $IROOT/
 #sed -i 's|cmake |CXXFLAGS="-march=native -fprofile-dir=/tmp/profile-data -fprofile-use -fprofile-correction" cmake |g' $IROOT/ffead-cpp-sql-raw/resources/rundyn-automake.sh
 ./install_ffead-cpp-sql-raw-profiled.sh async
+./install_ffead-cpp-sql-raw-profiled.sh async-qw
 mv $IROOT/ffead-cpp-sql-raw $IROOT/ffead-cpp-6.0-sql
 
 sed -i 's|localhost|tfb-database|g' $IROOT/ffead-cpp-6.0-sql/web/te-benchmark-um-pq-async/config/sdorm.xml
+sed -i 's|localhost|tfb-database|g' $IROOT/ffead-cpp-6.0-sql/web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 
 apt remove -yqq postgresql-13 postgresql-contrib-13 gnupg lsb-release
 apt autoremove -yqq
diff --git a/docker/techempower-config/sql-profiled-util.sh b/docker/techempower-config/sql-profiled-util.sh
index f964d42c2..048092b92 100644
--- a/docker/techempower-config/sql-profiled-util.sh
+++ b/docker/techempower-config/sql-profiled-util.sh
@@ -63,15 +63,19 @@ then
 	sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq)||g' CMakeLists.txt
 	sed -i 's|install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 	sed -i 's|tfb-database|localhost|g' $IROOT/ffead-cpp-src/web/te-benchmark-um-pq-async/config/sdorm.xml
+	sed -i 's|tfb-database|localhost|g' $IROOT/ffead-cpp-src/web/te-benchmark-um-pq-async-qw/config/sdorm.xml
 	rm -rf web/te-benchmark-um-pq
 	if [ "$4" = "pool" ]
 	then
 		sed -i 's|"TeBkUmLpqAsyncRouter"|"TeBkUmLpqAsyncRouterPooled"|g' $IROOT/ffead-cpp-src/web/te-benchmark-um-pq-async/config/application.xml
 		sed -i 's|TeBkUmLpqAsyncRouter|TeBkUmLpqAsyncRouterPooled|g' $IROOT/ffead-cpp-src/web/te-benchmark-um-pq-async/config/cachememory.xml
+		sed -i 's|"TeBkUmLpqQwAsyncRouter"|"TeBkUmLpqQwAsyncRouterPooled"|g' $IROOT/ffead-cpp-src/web/te-benchmark-um-pq-async-qw/config/application.xml
 	fi
 else
 	sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async)||g' CMakeLists.txt
 	sed -i 's|install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
+	sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw)||g' CMakeLists.txt
+	sed -i 's|install(FILES ${PROJECT_BINARY_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 	sed -i 's|tfb-database|localhost|g' $IROOT/ffead-cpp-src/web/te-benchmark-um-pq/config/sdorm.xml
-	rm -rf web/te-benchmark-um-pq-async
+	rm -rf web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 fi
\ No newline at end of file
diff --git a/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest b/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest
index fe6857191..037394be5 100644
--- a/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest
+++ b/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest
@@ -212,6 +212,7 @@ RUN ninja install && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-mongo-raw
 
 WORKDIR /tmp/ffead-cpp-src/build
@@ -220,6 +221,7 @@ RUN ninja install && \
 	cp -f /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um/config/cacheredis.xml /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um/config/cache.xml && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-mongo-orm
 
@@ -229,6 +231,7 @@ RUN ninja install && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-sql-raw
 
 WORKDIR /tmp/ffead-cpp-src/build
@@ -241,6 +244,7 @@ RUN cp -f /tmp/ffead-cpp-src/web/te-benchmark-um/sql-src/TeBkUmWorldsql.h /tmp/f
 	cp /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/resources/sample-odbc.ini /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/resources/odbc.ini && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-sql-orm
 
diff --git a/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest-nb b/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest-nb
index 9dcdb08da..eb86b979a 100644
--- a/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest-nb
+++ b/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest-nb
@@ -197,6 +197,7 @@ RUN ninja install && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-mongo-raw
 
 WORKDIR /tmp/ffead-cpp-src/build
@@ -205,6 +206,7 @@ RUN ninja install && \
 	cp -f /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um/config/cacheredis.xml /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um/config/cache.xml && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-mongo-orm
 
@@ -214,6 +216,7 @@ RUN ninja install && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-sql-raw
 
 WORKDIR /tmp/ffead-cpp-src/build
@@ -226,6 +229,7 @@ RUN cp -f /tmp/ffead-cpp-src/web/te-benchmark-um/sql-src/TeBkUmWorldsql.h /tmp/f
 	cp /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/resources/sample-odbc.ini /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/resources/odbc.ini && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-sql-orm
 
diff --git a/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest_localhost b/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest_localhost
index fbeb18bfe..548bc6df3 100644
--- a/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest_localhost
+++ b/docker/test/DockerFile-UbuntuBionic-x64-ffead-cpp-fortest_localhost
@@ -92,6 +92,7 @@ RUN ninja install && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-mongo-raw
 
 WORKDIR /tmp/ffead-cpp-src/build
@@ -100,6 +101,7 @@ RUN ninja install && \
 	cp -f /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um/config/cacheredis.xml /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um/config/cache.xml && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-mongo-orm
 
@@ -109,6 +111,7 @@ RUN ninja install && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-sql-raw
 
 WORKDIR /tmp/ffead-cpp-src/build
@@ -121,6 +124,7 @@ RUN cp -f /tmp/ffead-cpp-src/web/te-benchmark-um/sql-src/TeBkUmWorldsql.h /tmp/f
 	cp /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/resources/sample-odbc.ini /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/resources/odbc.ini && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async && \
+	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-pq-async-qw && \
 	rm -rf /tmp/ffead-cpp-src/ffead-cpp-6.0-bin/web/te-benchmark-um-mgr && \
 	mv /tmp/ffead-cpp-src/ffead-cpp-6.0-bin /tmp/ffead-cpp-sql-orm
 
diff --git a/docker/test/mongodb/create.js b/docker/test/mongodb/create.js
index 31de0e8a8..dc1a1e6ca 100644
--- a/docker/test/mongodb/create.js
+++ b/docker/test/mongodb/create.js
@@ -1,4 +1,4 @@
-use hello_world
+db = db.getSiblingDB('hello_world')
 db.world.drop()
 for (var i = 1; i <= 10000; i++) {
   db.world.save( { _id: i, id: i, randomNumber: Math.min(Math.floor(Math.random() * 10000) + 1, 10000) })
diff --git a/docker/test/mysql/create.sql b/docker/test/mysql/create.sql
index ed15cb5ff..f1a5756d2 100644
--- a/docker/test/mysql/create.sql
+++ b/docker/test/mysql/create.sql
@@ -2,6 +2,15 @@
 # http://stackoverflow.com/questions/37719818/the-server-time-zone-value-aest-is-unrecognized-or-represents-more-than-one-ti
 SET GLOBAL time_zone = '+00:00';
 
+CREATE USER 'benchmarkdbuser'@'%' IDENTIFIED WITH mysql_native_password BY 'benchmarkdbpass';
+CREATE USER 'benchmarkdbuser'@'localhost' IDENTIFIED WITH mysql_native_password BY 'benchmarkdbpass';
+
+-- GitHub Actions/CI run the database server on the same system as the benchmarks.
+-- Because we setup MySQL with the skip-name-resolve option, the IP address 127.0.0.1 might not be resolved to localhost
+-- anymore. This does not seem to matter, as long as Unix sockets are being used (e.g. when setting up the docker image),
+-- because the host is set to be localhost implicitly, but it matters for local TCP connections.
+CREATE USER 'benchmarkdbuser'@'127.0.0.1' IDENTIFIED WITH mysql_native_password BY 'benchmarkdbpass';
+
 # modified from SO answer http://stackoverflow.com/questions/5125096/for-loop-in-mysql
 CREATE DATABASE hello_world;
 USE hello_world;
@@ -12,10 +21,9 @@ CREATE TABLE  world (
   PRIMARY KEY  (id)
 )
 ENGINE=INNODB;
-CREATE USER 'benchmarkdbuser'@'%' IDENTIFIED WITH mysql_native_password BY 'benchmarkdbpass';
-CREATE USER 'benchmarkdbuser'@'localhost' IDENTIFIED WITH mysql_native_password BY 'benchmarkdbpass';
 GRANT ALL PRIVILEGES ON hello_world.world TO 'benchmarkdbuser'@'%';
 GRANT ALL PRIVILEGES ON hello_world.world TO 'benchmarkdbuser'@'localhost';
+GRANT ALL PRIVILEGES ON hello_world.world TO 'benchmarkdbuser'@'127.0.0.1';
 
 DELIMITER #
 CREATE PROCEDURE load_data()
@@ -46,6 +54,7 @@ CREATE TABLE  fortune (
 ENGINE=INNODB;
 GRANT ALL PRIVILEGES ON hello_world.fortune TO 'benchmarkdbuser'@'%';
 GRANT ALL PRIVILEGES ON hello_world.fortune TO 'benchmarkdbuser'@'localhost';
+GRANT ALL PRIVILEGES ON hello_world.fortune TO 'benchmarkdbuser'@'127.0.0.1';
 
 INSERT INTO fortune (message) VALUES ('fortune: No such file or directory');
 INSERT INTO fortune (message) VALUES ('A computer scientist is someone who fixes things that aren''t broken.');
diff --git a/docker/test/postgresql/pg_hba.conf b/docker/test/postgresql/pg_hba.conf
index ca7334d74..ac4b3621d 100644
--- a/docker/test/postgresql/pg_hba.conf
+++ b/docker/test/postgresql/pg_hba.conf
@@ -97,4 +97,4 @@ host    all             all             ::1/128                 md5
 #local   replication     postgres                                peer
 #host    replication     postgres        127.0.0.1/32            md5
 #host    replication     postgres        ::1/128                 md5
-host	all		all		0.0.0.0/0		md5
\ No newline at end of file
+host	all		all		0.0.0.0/0		md5
diff --git a/docker/test/postgresql/pgdg.list b/docker/test/postgresql/pgdg.list
index 2fa46e28c..64b218835 100644
--- a/docker/test/postgresql/pgdg.list
+++ b/docker/test/postgresql/pgdg.list
@@ -1,2 +1,2 @@
 deb http://apt.postgresql.org/pub/repos/apt/ bionic-pgdg main
-deb-src http://apt.postgresql.org/pub/repos/apt/ bionic-pgdg main
\ No newline at end of file
+deb-src http://apt.postgresql.org/pub/repos/apt/ bionic-pgdg main
diff --git a/docker/test/postgresql/postgresql.conf b/docker/test/postgresql/postgresql.conf
index e6abe3799..5a5f2eb1e 100644
--- a/docker/test/postgresql/postgresql.conf
+++ b/docker/test/postgresql/postgresql.conf
@@ -40,9 +40,9 @@
 
 data_directory = '/ssd/postgresql'		# use data in another directory
 					# (change requires restart)
-hba_file = '/etc/postgresql/13/main/pg_hba.conf'	# host-based authentication file
+hba_file = '/etc/postgresql/14/main/pg_hba.conf'	# host-based authentication file
 					# (change requires restart)
-ident_file = '/etc/postgresql/13/main/pg_ident.conf'	# ident configuration file
+ident_file = '/etc/postgresql/14/main/pg_ident.conf'	# ident configuration file
 					# (change requires restart)
 
 # If external_pid_file is not explicitly set, no extra PID file is written.
@@ -574,4 +574,4 @@ max_pred_locks_per_transaction = 256	# min 10
 # CUSTOMIZED OPTIONS
 #------------------------------------------------------------------------------
 
-#custom_variable_classes = ''		# list of custom variable class names
\ No newline at end of file
+#custom_variable_classes = ''		# list of custom variable class names
diff --git a/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp b/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp
index 887134c85..eb36f8be1 100644
--- a/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp
+++ b/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp
@@ -28,7 +28,7 @@ RUN mv ffead-cpp-master ffead-cpp-src
 RUN rm -f master.zip
 WORKDIR /tmp/ffead-cpp-src
 
-RUN rm -rf web/default web/oauthApp web/flexApp web/markers web/te-benchmark web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+RUN rm -rf web/default web/oauthApp web/flexApp web/markers web/te-benchmark web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/default)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/flexApp)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/oauthApp)||g' CMakeLists.txt
@@ -38,6 +38,7 @@ RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um)||g' CM
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-mgr)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async)||g' CMakeLists.txt
+RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/default/libdefault${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/flexApp/libflexApp${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/oauthApp/liboauthApp${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
@@ -47,6 +48,7 @@ RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um/libte-benc
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-mgr/libte-benchmark-um-mgr${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
+RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|web/default/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/flexApp/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/oauthApp/src/autotools/Makefile||g' configure.ac
@@ -56,6 +58,7 @@ RUN sed -i 's|web/te-benchmark-um/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/te-benchmark-um-mgr/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/te-benchmark-um-pq/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/te-benchmark-um-pq-async/src/autotools/Makefile||g' configure.ac
+RUN sed -i 's|web/te-benchmark-um-pq-async-qw/src/autotools/Makefile||g' configure.ac
 
 RUN cmake -DSRV_EMB=on -DMOD_REDIS=on -DMOD_SDORM_MONGO=on .
 RUN make install -j4
diff --git a/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp_autoconf b/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp_autoconf
index 434fc581c..f128d928c 100644
--- a/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp_autoconf
+++ b/docker/webrtc-peerjs/DockerFile-UbuntuBionic-x64-ffead-cpp_autoconf
@@ -28,7 +28,7 @@ RUN mv ffead-cpp-master ffead-cpp-src
 RUN rm -f master.zip
 WORKDIR /tmp/ffead-cpp-src
 
-RUN rm -rf web/default web/oauthApp web/flexApp web/markers web/te-benchmark web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async
+RUN rm -rf web/default web/oauthApp web/flexApp web/markers web/te-benchmark web/te-benchmark-um web/te-benchmark-um-mgr web/te-benchmark-um-pq web/te-benchmark-um-pq-async web/te-benchmark-um-pq-async-qw
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/default)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/flexApp)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/oauthApp)||g' CMakeLists.txt
@@ -38,6 +38,7 @@ RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um)||g' CM
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-mgr)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq)||g' CMakeLists.txt
 RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async)||g' CMakeLists.txt
+RUN sed -i 's|add_subdirectory(${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/default/libdefault${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/flexApp/libflexApp${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/oauthApp/liboauthApp${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
@@ -47,6 +48,7 @@ RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um/libte-benc
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-mgr/libte-benchmark-um-mgr${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq/libte-benchmark-um-pq${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async/libte-benchmark-um-pq-async${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
+RUN sed -i 's|install(FILES ${PROJECT_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw/libte-benchmark-um-pq-async-qw${LIB_EXT} DESTINATION ${PROJECT_NAME}-bin/lib)||g' CMakeLists.txt
 RUN sed -i 's|web/default/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/flexApp/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/oauthApp/src/autotools/Makefile||g' configure.ac
@@ -56,6 +58,7 @@ RUN sed -i 's|web/te-benchmark-um/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/te-benchmark-um-mgr/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/te-benchmark-um-pq/src/autotools/Makefile||g' configure.ac
 RUN sed -i 's|web/te-benchmark-um-pq-async/src/autotools/Makefile||g' configure.ac
+RUN sed -i 's|web/te-benchmark-um-pq-async-qw/src/autotools/Makefile||g' configure.ac
 
 RUN chmod +x autogen.sh
 RUN ./autogen.sh
diff --git a/meson.build b/meson.build
index a7ac844f8..7709e75a2 100644
--- a/meson.build
+++ b/meson.build
@@ -339,6 +339,7 @@ endif
 if get_option('MOD_SDORM_SQL') == true
 	subdir('web/te-benchmark-um-pq')
 	subdir('web/te-benchmark-um-pq-async')
+	subdir('web/te-benchmark-um-pq-async-qw')
 endif
 
 emb_includes = ['src/modules/common','src/modules/cache','src/modules/cache/memory','src/modules/cache/redis',
diff --git a/resources/server.prop b/resources/server.prop
index 2a8a664c8..120f72886 100644
--- a/resources/server.prop
+++ b/resources/server.prop
@@ -107,4 +107,4 @@ ENABLE_STATIC_RESP=true
 STATIC_PATH_RESP=/plaintext|text/plain|Hello, World!
 
 REQUEST_HANDLER=RequestReaderHandler
-
+QUEUED_WRITES=false
diff --git a/script/server.sh b/script/server.sh
index 0925e2fa6..18bd636ef 100644
--- a/script/server.sh
+++ b/script/server.sh
@@ -4,10 +4,10 @@ export MALLOC_CHECK_=0
 IS_OS_DARWIN=`uname|tr '[A-Z]' '[a-z]'|awk 'index($0,"darwin") != 0 {print "darwin"}'`
 IS_BSD=`uname|tr '[A-Z]' '[a-z]'|awk 'index($0,"bsd") != 0 {print "bsd"}'`
 if [ "$IS_OS_DARWIN" != "" ]; then
-	alias nproc="sysctl -n hw.ncpu"
+	#alias nproc="sysctl -n hw.ncpu"
 	export FFEAD_CPP_PATH=`cd "$(dirname server.sh)" && ABSPATH=$(pwd) && cd -`
 elif [ "$IS_BSD" != "" ]; then
-	alias nproc="sysctl -n hw.ncpu"
+	#alias nproc="sysctl -n hw.ncpu"
 	export FFEAD_CPP_PATH=`echo $(dirname $(readlink -f $0))`
 else
 	export FFEAD_CPP_PATH=`echo $(dirname $(readlink -f $0))`
diff --git a/src/modules/common/AppDefines.h b/src/modules/common/AppDefines.h
index a13ba4d9b..04134750a 100644
--- a/src/modules/common/AppDefines.h
+++ b/src/modules/common/AppDefines.h
@@ -14,15 +14,15 @@
     limitations under the License.
 */
 
-#define BUILD_MESON 1
+#define BUILD_CMAKE 1
 
-/* #undef INC_MEMORYCACHE */
-#define INC_REDISCACHE 1
-#define INC_MEMCACHED 1
+#define INC_MEMORYCACHE 1
+/* #undef INC_REDISCACHE */
+/* #undef INC_MEMCACHED */
 #define INC_SDORM 1
 #define INC_SDORM_SQL 1
 #define INC_SDORM_MONGO 1
-#define INC_BINSER 1
+/* #undef INC_BINSER */
 #define INC_JOBS 1
 #define APPLE 1
 /* #undef MINGW */
@@ -39,26 +39,26 @@
 /* #undef USE_WIN_IOCP */
 /* #undef USE_IO_URING */
 
-#define HAVE_LIBPQ 1
+#define HAVE_LIBPQ /usr/local/lib/libpq.dylib
 #define HAVE_REGEX 1
-/* #undef HAVE_SSLINC */
-#define HAVE_SSLLIB 1
-#define HAVE_REDISINC 1
-#define HAVE_REDISLIB 1
-#define HAVE_MEMCACHEDINC 1
-#define HAVE_MEMCACHEDLIB 1
-#define HAVE_CURLLIB 1
+#define HAVE_SSLINC 1
+#define HAVE_SSLLIB /usr/local/opt/openssl/lib/libssl.dylib
+/* #undef HAVE_REDISINC */
+/* #undef HAVE_REDISLIB */
+/* #undef HAVE_MEMCACHEDINC */
+/* #undef HAVE_MEMCACHEDLIB */
+#define HAVE_CURLLIB /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX12.0.sdk/usr/lib/libcurl.tbd
 #define HAVE_EXECINFOINC 1
-/* #undef HAVE_UUIDINC */
+#define HAVE_UUIDINC 1
 /* #undef HAVE_OSSPUUIDINC */
 /* #undef HAVE_OSSPUUIDINC_2 */
 /* #undef HAVE_BSDUUIDINC */
 #define HAVE_SQLINC 1
-#define HAVE_ODBCLIB 1
+#define HAVE_ODBCLIB /usr/local/lib/libodbc.dylib
 #define HAVE_MONGOINC 1
-#define HAVE_MONGOCLIB 1
+#define HAVE_MONGOCLIB /usr/local/lib/libmongoc-1.0.dylib
 #define HAVE_BSONINC 1
-#define HAVE_BSONLIB 1
+#define HAVE_BSONLIB /usr/local/lib/libbson-1.0.dylib
 #define INC_JOBS 1
 /* #undef OS_BSD */
 /* #undef OS_SOLARIS */
@@ -78,8 +78,8 @@
 /* #undef HAVE_LIBPQ_BATCH */
 /* #undef HAVE_LIBPQ_PIPELINE */
 /* #undef IS_SENDFILE */
-#define HAVE_RAPID_JSON 1
-#define HAVE_PUGI_XML 1
+/* #undef HAVE_RAPID_JSON */
+/* #undef HAVE_PUGI_XML */
 /* #undef HAVE_SYSINFO */
 
 #ifdef HAVE_ODBCLIB
@@ -91,6 +91,7 @@
 #define INC_DVIEW 1
 #define INC_DCP 1
 #define INC_XMLSER 1
+#define BUILD_CMAKE 1
 
 #ifdef APPLE
 #define OS_DARWIN 1
diff --git a/src/modules/common/blockingconcurrentqueue.h b/src/modules/common/blockingconcurrentqueue.h
index 0d66284fe..66579b6ca 100644
--- a/src/modules/common/blockingconcurrentqueue.h
+++ b/src/modules/common/blockingconcurrentqueue.h
@@ -1,419 +1,23 @@
 // Provides an efficient blocking version of moodycamel::ConcurrentQueue.
-// ©2015-2016 Cameron Desrochers. Distributed under the terms of the simplified
+// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified
 // BSD license, available at the top of concurrentqueue.h.
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
 // Uses Jeff Preshing's semaphore implementation (under the terms of its
-// separate zlib license, embedded below).
+// separate zlib license, see lightweightsemaphore.h).
 
 #pragma once
 
 #include "concurrentqueue.h"
+#include "lightweightsemaphore.h"
+
 #include <type_traits>
 #include <cerrno>
 #include <memory>
 #include <chrono>
 #include <ctime>
 
-#if defined(_WIN32)
-// Avoid including windows.h in a header; we only need a handful of
-// items, so we'll redeclare them here (this is relatively safe since
-// the API generally has to remain stable between Windows versions).
-// I know this is an ugly hack but it still beats polluting the global
-// namespace with thousands of generic names or adding a .cpp for nothing.
-extern "C" {
-	struct _SECURITY_ATTRIBUTES;
-	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
-	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
-	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
-	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
-}
-#elif defined(__MACH__)
-#include <mach/mach.h>
-#elif defined(__unix__)
-#include <semaphore.h>
-#endif
-
 namespace moodycamel
 {
-namespace details
-{
-	// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
-	// portable + lightweight semaphore implementations, originally from
-	// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
-	// LICENSE:
-	// Copyright (c) 2015 Jeff Preshing
-	//
-	// This software is provided 'as-is', without any express or implied
-	// warranty. In no event will the authors be held liable for any damages
-	// arising from the use of this software.
-	//
-	// Permission is granted to anyone to use this software for any purpose,
-	// including commercial applications, and to alter it and redistribute it
-	// freely, subject to the following restrictions:
-	//
-	// 1. The origin of this software must not be misrepresented; you must not
-	//	claim that you wrote the original software. If you use this software
-	//	in a product, an acknowledgement in the product documentation would be
-	//	appreciated but is not required.
-	// 2. Altered source versions must be plainly marked as such, and must not be
-	//	misrepresented as being the original software.
-	// 3. This notice may not be removed or altered from any source distribution.
-	namespace mpmc_sema
-	{
-#if defined(_WIN32)
-		class Semaphore
-		{
-		private:
-			void* m_hSema;
-			
-			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-		public:
-			Semaphore(int initialCount = 0)
-			{
-				assert(initialCount >= 0);
-				const long maxLong = 0x7fffffff;
-				m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
-			}
-
-			~Semaphore()
-			{
-				CloseHandle(m_hSema);
-			}
-
-			void wait()
-			{
-				const unsigned long infinite = 0xffffffff;
-				WaitForSingleObject(m_hSema, infinite);
-			}
-			
-			bool try_wait()
-			{
-				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
-				return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT;
-			}
-			
-			bool timed_wait(std::uint64_t usecs)
-			{
-				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
-				return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) != RC_WAIT_TIMEOUT;
-			}
-
-			void signal(int count = 1)
-			{
-				ReleaseSemaphore(m_hSema, count, nullptr);
-			}
-		};
-#elif defined(__MACH__)
-		//---------------------------------------------------------
-		// Semaphore (Apple iOS and OSX)
-		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-			semaphore_t m_sema;
-
-			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-		public:
-			Semaphore(int initialCount = 0)
-			{
-				assert(initialCount >= 0);
-				semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
-			}
-
-			~Semaphore()
-			{
-				semaphore_destroy(mach_task_self(), m_sema);
-			}
-
-			void wait()
-			{
-				semaphore_wait(m_sema);
-			}
-			
-			bool try_wait()
-			{
-				return timed_wait(0);
-			}
-			
-			bool timed_wait(std::uint64_t timeout_usecs)
-			{
-				mach_timespec_t ts;
-				ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
-				ts.tv_nsec = (timeout_usecs % 1000000) * 1000;
-
-				// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
-				kern_return_t rc = semaphore_timedwait(m_sema, ts);
-
-				return rc != KERN_OPERATION_TIMED_OUT && rc != KERN_ABORTED;
-			}
-
-			void signal()
-			{
-				semaphore_signal(m_sema);
-			}
-
-			void signal(int count)
-			{
-				while (count-- > 0)
-				{
-					semaphore_signal(m_sema);
-				}
-			}
-		};
-#elif defined(__unix__)
-		//---------------------------------------------------------
-		// Semaphore (POSIX, Linux)
-		//---------------------------------------------------------
-		class Semaphore
-		{
-		private:
-			sem_t m_sema;
-
-			Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-			Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
-
-		public:
-			Semaphore(int initialCount = 0)
-			{
-				assert(initialCount >= 0);
-				sem_init(&m_sema, 0, initialCount);
-			}
-
-			~Semaphore()
-			{
-				sem_destroy(&m_sema);
-			}
-
-			void wait()
-			{
-				// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
-				int rc;
-				do {
-					rc = sem_wait(&m_sema);
-				} while (rc == -1 && errno == EINTR);
-			}
-
-			bool try_wait()
-			{
-				int rc;
-				do {
-					rc = sem_trywait(&m_sema);
-				} while (rc == -1 && errno == EINTR);
-				return !(rc == -1 && errno == EAGAIN);
-			}
-
-			bool timed_wait(std::uint64_t usecs)
-			{
-				struct timespec ts;
-				const int usecs_in_1_sec = 1000000;
-				const int nsecs_in_1_sec = 1000000000;
-				clock_gettime(CLOCK_REALTIME, &ts);
-				ts.tv_sec += usecs / usecs_in_1_sec;
-				ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000;
-				// sem_timedwait bombs if you have more than 1e9 in tv_nsec
-				// so we have to clean things up before passing it in
-				if (ts.tv_nsec >= nsecs_in_1_sec) {
-					ts.tv_nsec -= nsecs_in_1_sec;
-					++ts.tv_sec;
-				}
-
-				int rc;
-				do {
-					rc = sem_timedwait(&m_sema, &ts);
-				} while (rc == -1 && errno == EINTR);
-				return !(rc == -1 && errno == ETIMEDOUT);
-			}
-
-			void signal()
-			{
-				sem_post(&m_sema);
-			}
-
-			void signal(int count)
-			{
-				while (count-- > 0)
-				{
-					sem_post(&m_sema);
-				}
-			}
-		};
-#else
-#error Unsupported platform! (No semaphore wrapper available)
-#endif
-
-		//---------------------------------------------------------
-		// LightweightSemaphore
-		//---------------------------------------------------------
-		class LightweightSemaphore
-		{
-		public:
-			typedef std::make_signed<std::size_t>::type ssize_t;
-
-		private:
-			std::atomic<ssize_t> m_count;
-			Semaphore m_sema;
-
-			bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
-			{
-				ssize_t oldCount;
-				// Is there a better way to set the initial spin count?
-				// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
-				// as threads start hitting the kernel semaphore.
-				int spin = 10000;
-				while (--spin >= 0)
-				{
-					oldCount = m_count.load(std::memory_order_relaxed);
-					if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-						return true;
-					std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
-				}
-				oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-				if (oldCount > 0)
-					return true;
-				if (timeout_usecs < 0)
-				{
-					m_sema.wait();
-					return true;
-				}
-				if (m_sema.timed_wait((std::uint64_t)timeout_usecs))
-					return true;
-				// At this point, we've timed out waiting for the semaphore, but the
-				// count is still decremented indicating we may still be waiting on
-				// it. So we have to re-adjust the count, but only if the semaphore
-				// wasn't signaled enough times for us too since then. If it was, we
-				// need to release the semaphore too.
-				while (true)
-				{
-					oldCount = m_count.load(std::memory_order_acquire);
-					if (oldCount >= 0 && m_sema.try_wait())
-						return true;
-					if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
-						return false;
-				}
-			}
-
-			ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
-			{
-				assert(max > 0);
-				ssize_t oldCount;
-				int spin = 10000;
-				while (--spin >= 0)
-				{
-					oldCount = m_count.load(std::memory_order_relaxed);
-					if (oldCount > 0)
-					{
-						ssize_t newCount = oldCount > max ? oldCount - max : 0;
-						if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-							return oldCount - newCount;
-					}
-					std::atomic_signal_fence(std::memory_order_acquire);
-				}
-				oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
-				if (oldCount <= 0)
-				{
-					if (timeout_usecs < 0)
-						m_sema.wait();
-					else if (!m_sema.timed_wait((std::uint64_t)timeout_usecs))
-					{
-						while (true)
-						{
-							oldCount = m_count.load(std::memory_order_acquire);
-							if (oldCount >= 0 && m_sema.try_wait())
-								break;
-							if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
-								return 0;
-						}
-					}
-				}
-				if (max > 1)
-					return 1 + tryWaitMany(max - 1);
-				return 1;
-			}
-
-		public:
-			LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
-			{
-				assert(initialCount >= 0);
-			}
-
-			bool tryWait()
-			{
-				ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-				while (oldCount > 0)
-				{
-					if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
-						return true;
-				}
-				return false;
-			}
-
-			void wait()
-			{
-				if (!tryWait())
-					waitWithPartialSpinning();
-			}
-
-			bool wait(std::int64_t timeout_usecs)
-			{
-				return tryWait() || waitWithPartialSpinning(timeout_usecs);
-			}
-
-			// Acquires between 0 and (greedily) max, inclusive
-			ssize_t tryWaitMany(ssize_t max)
-			{
-				assert(max >= 0);
-				ssize_t oldCount = m_count.load(std::memory_order_relaxed);
-				while (oldCount > 0)
-				{
-					ssize_t newCount = oldCount > max ? oldCount - max : 0;
-					if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
-						return oldCount - newCount;
-				}
-				return 0;
-			}
-
-			// Acquires at least one, and (greedily) at most max
-			ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
-			{
-				assert(max >= 0);
-				ssize_t result = tryWaitMany(max);
-				if (result == 0 && max > 0)
-					result = waitManyWithPartialSpinning(max, timeout_usecs);
-				return result;
-			}
-			
-			ssize_t waitMany(ssize_t max)
-			{
-				ssize_t result = waitMany(max, -1);
-				assert(result > 0);
-				return result;
-			}
-
-			void signal(ssize_t count = 1)
-			{
-				assert(count >= 0);
-				ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
-				ssize_t toRelease = -oldCount < count ? -oldCount : count;
-				if (toRelease > 0)
-				{
-					m_sema.signal((int)toRelease);
-				}
-			}
-			
-			ssize_t availableApprox() const
-			{
-				ssize_t count = m_count.load(std::memory_order_relaxed);
-				return count > 0 ? count : 0;
-			}
-		};
-	}	// end namespace mpmc_sema
-}	// end namespace details
-
-
 // This is a blocking version of the queue. It has an almost identical interface to
 // the normal non-blocking version, with the addition of various wait_dequeue() methods
 // and the removal of producer-specific dequeue methods.
@@ -422,7 +26,7 @@ class BlockingConcurrentQueue
 {
 private:
 	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
-	typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore;
+	typedef ::moodycamel::LightweightSemaphore LightweightSemaphore;
 
 public:
 	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
@@ -432,7 +36,7 @@ class BlockingConcurrentQueue
 	typedef typename ConcurrentQueue::size_t size_t;
 	typedef typename std::make_signed<size_t>::type ssize_t;
 	
-	static const size_t BLOCK_SIZE_ = ConcurrentQueue::BLOCK_SIZE_;
+	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
 	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
 	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
@@ -451,8 +55,8 @@ class BlockingConcurrentQueue
 	// queue is fully constructed before it starts being used by other threads (this
 	// includes making the memory effects of construction visible, possibly with a
 	// memory barrier).
-	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE_)
-		: inner(capacity), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+		: inner(capacity), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
 	{
 		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
 		if (!sema) {
@@ -461,7 +65,7 @@ class BlockingConcurrentQueue
 	}
 	
 	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
-		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore>(), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
 	{
 		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
 		if (!sema) {
@@ -754,7 +358,9 @@ class BlockingConcurrentQueue
 	template<typename U>
 	inline void wait_dequeue(U& item)
 	{
-		sema->wait();
+		while (!sema->wait()) {
+			continue;
+		}
 		while (!inner.try_dequeue(item)) {
 			continue;
 		}
@@ -795,7 +401,9 @@ class BlockingConcurrentQueue
 	template<typename U>
 	inline void wait_dequeue(consumer_token_t& token, U& item)
 	{
-		sema->wait();
+		while (!sema->wait()) {
+			continue;
+		}
 		while (!inner.try_dequeue(token, item)) {
 			continue;
 		}
@@ -943,18 +551,11 @@ class BlockingConcurrentQueue
 	
 
 private:
-	template<typename U>
-	static inline U* create()
-	{
-		auto p = (Traits::malloc)(sizeof(U));
-		return p != nullptr ? new (p) U : nullptr;
-	}
-	
-	template<typename U, typename A1>
-	static inline U* create(A1&& a1)
+	template<typename U, typename A1, typename A2>
+	static inline U* create(A1&& a1, A2&& a2)
 	{
-		auto p = (Traits::malloc)(sizeof(U));
-		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+		void* p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) : nullptr;
 	}
 	
 	template<typename U>
diff --git a/src/modules/common/concurrentqueue.h b/src/modules/common/concurrentqueue.h
index d48a2e4a4..3ced1c7b2 100644
--- a/src/modules/common/concurrentqueue.h
+++ b/src/modules/common/concurrentqueue.h
@@ -5,7 +5,7 @@
 //    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
 
 // Simplified BSD license:
-// Copyright (c) 2013-2016, Cameron Desrochers.
+// Copyright (c) 2013-2020, Cameron Desrochers.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without modification,
@@ -27,10 +27,11 @@
 // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
 
 #pragma once
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 // Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
 // Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
 // upon assigning any computed values)
@@ -42,6 +43,13 @@
 #endif
 #endif
 
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
 #if defined(__APPLE__)
 #include "TargetConditionals.h"
 #endif
@@ -96,7 +104,7 @@ namespace moodycamel { namespace details {
 	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
 	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
 } }
-#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
 namespace moodycamel { namespace details {
 	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
 	
@@ -146,10 +154,21 @@ namespace moodycamel { namespace details {
 	typedef std::uintptr_t thread_id_t;
 	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
 	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
-	static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
+	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
 } }
 #endif
 
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
+#define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
 // Exceptions
 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
@@ -162,8 +181,8 @@ namespace moodycamel { namespace details {
 #define MOODYCAMEL_RETHROW throw
 #define MOODYCAMEL_THROW(expr) throw (expr)
 #else
-#define MOODYCAMEL_TRY if (true)
-#define MOODYCAMEL_CATCH(...) else if (false)
+#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
 #define MOODYCAMEL_RETHROW
 #define MOODYCAMEL_THROW(expr)
 #endif
@@ -214,6 +233,44 @@ namespace moodycamel { namespace details {
 #endif
 #endif
 
+namespace moodycamel { namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
+	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
+	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
+	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
+	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
+	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
+	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
+	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
+	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
+	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
+#else
+	template<typename T> struct identity { typedef T type; };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
+#endif
+#endif
+} }
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+ #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
 // Compiler-specific likely/unlikely hints
 namespace moodycamel { namespace details {
 #if defined(__GNUC__)
@@ -282,7 +339,7 @@ struct ConcurrentQueueDefaultTraits
 	// but many producers, a smaller block size should be favoured. For few producers
 	// and/or many elements, a larger block size is preferred. A sane default
 	// is provided. Must be a power of 2.
-	static const size_t BLOCK_SIZE_ = 32;
+	static const size_t BLOCK_SIZE = 32;
 	
 	// For explicit producers (i.e. when using a producer token), the block is
 	// checked for being empty by iterating through a list of flags, one per element.
@@ -315,6 +372,12 @@ struct ConcurrentQueueDefaultTraits
 	// that this limit is enforced at the block level (for performance reasons), i.e.
 	// it's rounded up to the nearest block size.
 	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
+
+	// The number of times to spin before sleeping when waiting on a semaphore.
+	// Recommended values are on the order of 1000-10000 unless the number of
+	// consumer threads exceeds the number of idle cores (in which case try 0-100).
+	// Only affects instances of the BlockingConcurrentQueue.
+	static const int MAX_SEMA_SPINS = 10000;
 	
 	
 #ifndef MCDBGQ_USE_RELACY
@@ -689,7 +752,7 @@ class ConcurrentQueue
 	typedef typename Traits::index_t index_t;
 	typedef typename Traits::size_t size_t;
 	
-	static const size_t BLOCK_SIZE_ = static_cast<size_t>(Traits::BLOCK_SIZE_);
+	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
 	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
 	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
 	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
@@ -700,7 +763,7 @@ class ConcurrentQueue
 #pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
 #pragma warning(disable: 4309)		// static_cast: Truncation of constant value
 #endif
-	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE_) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE_ - 1)) / BLOCK_SIZE_ * BLOCK_SIZE_);
+	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
@@ -708,7 +771,7 @@ class ConcurrentQueue
 	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
 	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
 	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
-	static_assert((BLOCK_SIZE_ > 1) && !(BLOCK_SIZE_ & (BLOCK_SIZE_ - 1)), "Traits::BLOCK_SIZE_ must be a power of 2 (and at least 2)");
+	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
 	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
 	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
 	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
@@ -726,7 +789,7 @@ class ConcurrentQueue
 	// queue is fully constructed before it starts being used by other threads (this
 	// includes making the memory effects of construction visible, possibly with a
 	// memory barrier).
-	explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE_)
+	explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
 		: producerListTail(nullptr),
 		producerCount(0),
 		initialBlockPoolIndex(0),
@@ -735,7 +798,7 @@ class ConcurrentQueue
 	{
 		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 		populate_initial_implicit_producer_hash();
-		populate_initial_block_list(capacity / BLOCK_SIZE_ + ((capacity & (BLOCK_SIZE_ - 1)) == 0 ? 0 : 1));
+		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
 		
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
 		// Track all the producers using a fully-resolved typed list for
@@ -759,7 +822,7 @@ class ConcurrentQueue
 	{
 		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
 		populate_initial_implicit_producer_hash();
-		size_t blocks = (((minCapacity + BLOCK_SIZE_ - 1) / BLOCK_SIZE_) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
+		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
 		populate_initial_block_list(blocks);
 		
 #ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
@@ -785,7 +848,7 @@ class ConcurrentQueue
 		}
 		
 		// Destroy implicit producer hash tables
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
 			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
 			while (hash != nullptr) {
 				auto prev = hash->prev;
@@ -910,8 +973,8 @@ class ConcurrentQueue
 	// Thread-safe.
 	inline bool enqueue(T const& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CanAlloc>(item);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(item);
 	}
 	
 	// Enqueues a single item (by moving it, if possible).
@@ -921,8 +984,8 @@ class ConcurrentQueue
 	// Thread-safe.
 	inline bool enqueue(T&& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CanAlloc>(std::move(item));
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(std::move(item));
 	}
 	
 	// Enqueues a single item (by copying it) using an explicit producer token.
@@ -952,8 +1015,8 @@ class ConcurrentQueue
 	template<typename It>
 	bool enqueue_bulk(It itemFirst, size_t count)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
 	}
 	
 	// Enqueues several items using an explicit producer token.
@@ -975,8 +1038,8 @@ class ConcurrentQueue
 	// Thread-safe.
 	inline bool try_enqueue(T const& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CannotAlloc>(item);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(item);
 	}
 	
 	// Enqueues a single item (by moving it, if possible).
@@ -986,8 +1049,8 @@ class ConcurrentQueue
 	// Thread-safe.
 	inline bool try_enqueue(T&& item)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue<CannotAlloc>(std::move(item));
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(std::move(item));
 	}
 	
 	// Enqueues a single item (by copying it) using an explicit producer token.
@@ -1016,8 +1079,8 @@ class ConcurrentQueue
 	template<typename It>
 	bool try_enqueue_bulk(It itemFirst, size_t count)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
-		return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
 	}
 	
 	// Enqueues several items using an explicit producer token.
@@ -1374,7 +1437,7 @@ class ConcurrentQueue
 		
 		inline void add(N* node)
 		{
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
 			debug::DebugLock lock(mutex);
 #endif		
 			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
@@ -1388,7 +1451,7 @@ class ConcurrentQueue
 		
 		inline N* try_get()
 		{
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
 			debug::DebugLock lock(mutex);
 #endif		
 			auto head = freeListHead.load(std::memory_order_acquire);
@@ -1460,7 +1523,7 @@ class ConcurrentQueue
 	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
 	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
 		
-#if MCDBGQ_NOLOCKFREE_FREELIST
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
 		debug::DebugMutex mutex;
 #endif
 	};
@@ -1477,7 +1540,7 @@ class ConcurrentQueue
 		Block()
 			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true)
 		{
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 			owner = nullptr;
 #endif
 		}
@@ -1485,9 +1548,9 @@ class ConcurrentQueue
 		template<InnerQueueContext context>
 		inline bool is_empty() const
 		{
-			if (context == explicit_context && BLOCK_SIZE_ <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Check flags
-				for (size_t i = 0; i < BLOCK_SIZE_; ++i) {
+				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
 					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
 						return false;
 					}
@@ -1499,42 +1562,42 @@ class ConcurrentQueue
 			}
 			else {
 				// Check counter
-				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE_) {
+				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
 					std::atomic_thread_fence(std::memory_order_acquire);
 					return true;
 				}
-				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE_);
+				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
 				return false;
 			}
 		}
 		
 		// Returns true if the block is now empty (does not apply in explicit context)
 		template<InnerQueueContext context>
-		inline bool set_empty(index_t i)
+		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
 		{
-			if (context == explicit_context && BLOCK_SIZE_ <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Set flag
-				assert(!emptyFlags[BLOCK_SIZE_ - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE_ - 1))].load(std::memory_order_relaxed));
-				emptyFlags[BLOCK_SIZE_ - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE_ - 1))].store(true, std::memory_order_release);
+				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
+				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
 				return false;
 			}
 			else {
 				// Increment counter
 				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
-				assert(prevVal < BLOCK_SIZE_);
-				return prevVal == BLOCK_SIZE_ - 1;
+				assert(prevVal < BLOCK_SIZE);
+				return prevVal == BLOCK_SIZE - 1;
 			}
 		}
 		
 		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
 		// Returns true if the block is now empty (does not apply in explicit context).
 		template<InnerQueueContext context>
-		inline bool set_many_empty(index_t i, size_t count)
+		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
 		{
-			if (context == explicit_context && BLOCK_SIZE_ <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Set flags
 				std::atomic_thread_fence(std::memory_order_release);
-				i = BLOCK_SIZE_ - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE_ - 1)) - count + 1;
+				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
 				for (size_t j = 0; j != count; ++j) {
 					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
 					emptyFlags[i + j].store(true, std::memory_order_relaxed);
@@ -1544,32 +1607,32 @@ class ConcurrentQueue
 			else {
 				// Increment counter
 				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
-				assert(prevVal + count <= BLOCK_SIZE_);
-				return prevVal + count == BLOCK_SIZE_;
+				assert(prevVal + count <= BLOCK_SIZE);
+				return prevVal + count == BLOCK_SIZE;
 			}
 		}
 		
 		template<InnerQueueContext context>
 		inline void set_all_empty()
 		{
-			if (context == explicit_context && BLOCK_SIZE_ <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Set all flags
-				for (size_t i = 0; i != BLOCK_SIZE_; ++i) {
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
 					emptyFlags[i].store(true, std::memory_order_relaxed);
 				}
 			}
 			else {
 				// Reset counter
-				elementsCompletelyDequeued.store(BLOCK_SIZE_, std::memory_order_relaxed);
+				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
 			}
 		}
 		
 		template<InnerQueueContext context>
 		inline void reset_empty()
 		{
-			if (context == explicit_context && BLOCK_SIZE_ <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
 				// Reset flags
-				for (size_t i = 0; i != BLOCK_SIZE_; ++i) {
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
 					emptyFlags[i].store(false, std::memory_order_relaxed);
 				}
 			}
@@ -1579,42 +1642,30 @@ class ConcurrentQueue
 			}
 		}
 		
-		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE_ - 1)); }
-		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE_ - 1)); }
+		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
 		
 	private:
-		// IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of
-		// addresses returned by malloc, that alignment will be preserved. Apparently clang actually
-		// generates code that uses this assumption for AVX instructions in some cases. Ideally, we
-		// should also align Block to the alignment of T in case it's higher than malloc's 16-byte
-		// alignment, but this is hard to do in a cross-platform way. Assert for this case:
-		static_assert(std::alignment_of<T>::value <= std::alignment_of<details::max_align_t>::value, "The queue does not support super-aligned types at this time");
-		// Additionally, we need the alignment of Block itself to be a multiple of max_align_t since
-		// otherwise the appropriate padding will not be added at the end of Block in order to make
-		// arrays of Blocks all be properly aligned (not just the first one). We use a union to force
-		// this.
-		union {
-			char elements[sizeof(T) * BLOCK_SIZE_];
-			details::max_align_t dummy;
-		};
+		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
+		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
 	public:
 		Block* next;
 		std::atomic<size_t> elementsCompletelyDequeued;
-		std::atomic<bool> emptyFlags[BLOCK_SIZE_ <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE_ : 1];
+		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
 	public:
 		std::atomic<std::uint32_t> freeListRefs;
 		std::atomic<Block*> freeListNext;
 		std::atomic<bool> shouldBeOnFreeList;
 		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
 		
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		void* owner;
 #endif
 	};
-	static_assert(std::alignment_of<Block>::value >= std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
 
 
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 public:
 	struct MemStats;
 private:
@@ -1637,7 +1688,7 @@ class ConcurrentQueue
 		{
 		}
 		
-		virtual ~ProducerBase() { };
+		virtual ~ProducerBase() { }
 		
 		template<typename U>
 		inline bool dequeue(U& element)
@@ -1685,7 +1736,7 @@ class ConcurrentQueue
 		ConcurrentQueue* parent;
 		
 	protected:
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
@@ -1697,8 +1748,8 @@ class ConcurrentQueue
 		
 	struct ExplicitProducer : public ProducerBase
 	{
-		explicit ExplicitProducer(ConcurrentQueue* parent) :
-			ProducerBase(parent, true),
+		explicit ExplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, true),
 			blockIndex(nullptr),
 			pr_blockIndexSlotsUsed(0),
 			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
@@ -1706,7 +1757,7 @@ class ConcurrentQueue
 			pr_blockIndexEntries(nullptr),
 			pr_blockIndexRaw(nullptr)
 		{
-			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1;
+			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
 			if (poolBasedIndexSize > pr_blockIndexSize) {
 				pr_blockIndexSize = poolBasedIndexSize;
 			}
@@ -1722,11 +1773,11 @@ class ConcurrentQueue
 			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
 				// First find the block that's partially dequeued, if any
 				Block* halfDequeuedBlock = nullptr;
-				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE_ - 1)) != 0) {
+				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
 					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
 					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
 					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
-					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE_, this->headIndex.load(std::memory_order_relaxed))) {
+					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
 						i = (i + 1) & (pr_blockIndexSize - 1);
 					}
 					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
@@ -1743,12 +1794,12 @@ class ConcurrentQueue
 					
 					size_t i = 0;	// Offset into block
 					if (block == halfDequeuedBlock) {
-						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE_ - 1));
+						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
 					}
 					
 					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
-					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0 ? BLOCK_SIZE_ : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE_ - 1));
-					while (i != BLOCK_SIZE_ && (block != this->tailBlock || i != lastValidIndex)) {
+					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
 						(*block)[i++]->~T();
 					}
 				} while (block != this->tailBlock);
@@ -1784,7 +1835,7 @@ class ConcurrentQueue
 		{
 			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
 			index_t newTailIndex = 1 + currentTailIndex;
-			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0) {
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
 				// We reached the end of a block, start a new one
 				auto startBlock = this->tailBlock;
 				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
@@ -1805,8 +1856,8 @@ class ConcurrentQueue
 					// <= to it.
 					auto head = this->headIndex.load(std::memory_order_relaxed);
 					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE_)
-						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE_ < currentTailIndex - head))) {
+					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
 						// We can't enqueue in another block because there's not enough leeway -- the
 						// tail could surpass the head by the time the block fills up! (Or we'll exceed
 						// the size limit, if the second part of the condition was true.)
@@ -1818,7 +1869,10 @@ class ConcurrentQueue
 						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
 						// the initial allocation failed in the constructor.
 						
-						if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) {
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							return false;
+						}
+						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
 							return false;
 						}
 					}
@@ -1828,7 +1882,7 @@ class ConcurrentQueue
 					if (newBlock == nullptr) {
 						return false;
 					}
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
@@ -1842,8 +1896,8 @@ class ConcurrentQueue
 					this->tailBlock = newBlock;
 					++pr_blockIndexSlotsUsed;
 				}
-				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					// The constructor may throw. We want the element not to appear in the queue in
 					// that case (without corrupting the queue):
 					MOODYCAMEL_TRY {
@@ -1869,7 +1923,7 @@ class ConcurrentQueue
 				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
 				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
 				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					this->tailIndex.store(newTailIndex, std::memory_order_release);
 					return true;
 				}
@@ -1943,8 +1997,8 @@ class ConcurrentQueue
 					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
 					// block size (in order to get a correct signed block count offset in all cases):
 					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
-					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE_ - 1);
-					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE_);
+					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);
 					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
 					
 					// Dequeue
@@ -1962,12 +2016,12 @@ class ConcurrentQueue
 								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
 							}
 						} guard = { block, index };
-						
-						element = std::move(el);
+
+						element = std::move(el); // NOLINT
 					}
 					else {
-						element = std::move(el);
-						el.~T();
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
 						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
 					}
 					
@@ -1983,7 +2037,7 @@ class ConcurrentQueue
 		}
 		
 		template<AllocationMode allocMode, typename It>
-		bool enqueue_bulk(It itemFirst, size_t count)
+		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
 		{
 			// First, we need to make sure we have enough room to enqueue all of the elements;
 			// this means pre-allocating blocks and putting them in the block index (but only if
@@ -1996,13 +2050,13 @@ class ConcurrentQueue
 			Block* firstAllocatedBlock = nullptr;
 			
 			// Figure out how many blocks we'll need to allocate, and do so
-			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1));
-			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1);
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 			if (blockBaseDiff > 0) {
 				// Allocate as many blocks as possible from ahead
 				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
-					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE_);
-					currentTailIndex += static_cast<index_t>(BLOCK_SIZE_);
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
 					
 					this->tailBlock = this->tailBlock->next;
 					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
@@ -2015,14 +2069,21 @@ class ConcurrentQueue
 				
 				// Now allocate as many blocks as necessary from the block pool
 				while (blockBaseDiff > 0) {
-					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE_);
-					currentTailIndex += static_cast<index_t>(BLOCK_SIZE_);
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
 					
 					auto head = this->headIndex.load(std::memory_order_relaxed);
 					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE_) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE_ < currentTailIndex - head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
 					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
-						if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) {
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
 							// Failed to allocate, undo changes (but keep injected blocks)
 							pr_blockIndexFront = originalBlockIndexFront;
 							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
@@ -2045,7 +2106,7 @@ class ConcurrentQueue
 						return false;
 					}
 					
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
@@ -2078,7 +2139,7 @@ class ConcurrentQueue
 					block = block->next;
 				}
 				
-				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
 					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
 				}
 			}
@@ -2088,16 +2149,16 @@ class ConcurrentQueue
 			currentTailIndex = startTailIndex;
 			auto endBlock = this->tailBlock;
 			this->tailBlock = startBlock;
-			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
-			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0 && firstAllocatedBlock != nullptr) {
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
 				this->tailBlock = firstAllocatedBlock;
 			}
 			while (true) {
-				auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
 					stopIndex = newTailIndex;
 				}
-				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
 					while (currentTailIndex != stopIndex) {
 						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
 					}
@@ -2112,7 +2173,7 @@ class ConcurrentQueue
 							// may only define a (noexcept) move constructor, and so calls to the
 							// cctor will not compile, even if they are in an if branch that will never
 							// be executed
-							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
 							++currentTailIndex;
 							++itemFirst;
 						}
@@ -2130,12 +2191,12 @@ class ConcurrentQueue
 						
 						if (!details::is_trivially_destructible<T>::value) {
 							auto block = startBlock;
-							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0) {
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
 								block = firstAllocatedBlock;
 							}
 							currentTailIndex = startTailIndex;
 							while (true) {
-								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
 									stopIndex = constructedStopIndex;
 								}
@@ -2159,8 +2220,9 @@ class ConcurrentQueue
 				this->tailBlock = this->tailBlock->next;
 			}
 			
-			if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) {
-				blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+				if (firstAllocatedBlock != nullptr)
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
 			}
 			
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
@@ -2177,7 +2239,7 @@ class ConcurrentQueue
 				desiredCount = desiredCount < max ? desiredCount : max;
 				std::atomic_thread_fence(std::memory_order_acquire);
 				
-				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);;
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
 				
 				tail = this->tailIndex.load(std::memory_order_acquire);
 				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
@@ -2196,15 +2258,15 @@ class ConcurrentQueue
 					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
 					
 					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
-					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE_ - 1);
-					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE_);
+					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);
 					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
 					
 					// Iterate the blocks and dequeue
 					auto index = firstIndex;
 					do {
 						auto firstIndexInBlock = index;
-						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 						auto block = localBlockIndex->entries[indexIndex].block;
 						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
@@ -2238,7 +2300,7 @@ class ConcurrentQueue
 									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
 									
 									firstIndexInBlock = index;
-									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 								} while (index != firstIndex + actualCount);
 								
@@ -2331,7 +2393,7 @@ class ConcurrentQueue
 	private:
 #endif
 		
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
@@ -2343,8 +2405,8 @@ class ConcurrentQueue
 	
 	struct ImplicitProducer : public ProducerBase
 	{			
-		ImplicitProducer(ConcurrentQueue* parent) :
-			ProducerBase(parent, false),
+		ImplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, false),
 			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
 			blockIndex(nullptr)
 		{
@@ -2372,7 +2434,7 @@ class ConcurrentQueue
 			assert(index == tail || details::circular_less_than(index, tail));
 			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
 			while (index != tail) {
-				if ((index & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0 || block == nullptr) {
+				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
 					if (block != nullptr) {
 						// Free the old block
 						this->parent->add_block_to_free_list(block);
@@ -2387,7 +2449,7 @@ class ConcurrentQueue
 			// Even if the queue is empty, there's still one block that's not on the free list
 			// (unless the head index reached the end of it, in which case the tail will be poised
 			// to create a new block).
-			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE_ - 1)) != 0)) {
+			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
 				this->parent->add_block_to_free_list(this->tailBlock);
 			}
 			
@@ -2411,14 +2473,14 @@ class ConcurrentQueue
 		{
 			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
 			index_t newTailIndex = 1 + currentTailIndex;
-			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0) {
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
 				// We reached the end of a block, start a new one
 				auto head = this->headIndex.load(std::memory_order_relaxed);
 				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE_) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE_ < currentTailIndex - head))) {
+				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
 					return false;
 				}
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 				debug::DebugLock lock(mutex);
 #endif
 				// Find out where we'll be inserting this block in the block index
@@ -2434,12 +2496,12 @@ class ConcurrentQueue
 					idxEntry->value.store(nullptr, std::memory_order_relaxed);
 					return false;
 				}
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 				newBlock->owner = this;
 #endif
 				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
-				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					// May throw, try to insert now before we publish the fact that we have this new block
 					MOODYCAMEL_TRY {
 						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
@@ -2457,7 +2519,7 @@ class ConcurrentQueue
 				
 				this->tailBlock = newBlock;
 				
-				if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
 					this->tailIndex.store(newTailIndex, std::memory_order_release);
 					return true;
 				}
@@ -2492,7 +2554,7 @@ class ConcurrentQueue
 					auto& el = *((*block)[index]);
 					
 					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 						// Note: Acquiring the mutex with every dequeue instead of only when a block
 						// is released is very sub-optimal, but it is, after all, purely debug code.
 						debug::DebugLock lock(producer->mutex);
@@ -2512,16 +2574,16 @@ class ConcurrentQueue
 								}
 							}
 						} guard = { block, index, entry, this->parent };
-						
-						element = std::move(el);
+
+						element = std::move(el); // NOLINT
 					}
 					else {
-						element = std::move(el);
-						el.~T();
-					
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+
 						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
 							{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 								debug::DebugLock lock(mutex);
 #endif
 								// Add the block back into the global free pool (and remove from block index)
@@ -2541,6 +2603,10 @@ class ConcurrentQueue
 			return false;
 		}
 		
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
 		template<AllocationMode allocMode, typename It>
 		bool enqueue_bulk(It itemFirst, size_t count)
 		{
@@ -2559,15 +2625,15 @@ class ConcurrentQueue
 			auto endBlock = this->tailBlock;
 			
 			// Figure out how many blocks we'll need to allocate, and do so
-			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1));
-			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1);
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 			if (blockBaseDiff > 0) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 				debug::DebugLock lock(mutex);
 #endif
 				do {
-					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE_);
-					currentTailIndex += static_cast<index_t>(BLOCK_SIZE_);
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
 					
 					// Find out where we'll be inserting this block in the block index
 					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
@@ -2575,7 +2641,8 @@ class ConcurrentQueue
 					bool indexInserted = false;
 					auto head = this->headIndex.load(std::memory_order_relaxed);
 					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
-					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE_) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE_ < currentTailIndex - head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+
 					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
 						// Index allocation or block allocation failed; revert any other allocations
 						// and index insertions done so far for this operation
@@ -2583,9 +2650,9 @@ class ConcurrentQueue
 							rewind_block_index_tail();
 							idxEntry->value.store(nullptr, std::memory_order_relaxed);
 						}
-						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1);
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
-							currentTailIndex += static_cast<index_t>(BLOCK_SIZE_);
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
 							idxEntry = get_block_index_entry_for_index(currentTailIndex);
 							idxEntry->value.store(nullptr, std::memory_order_relaxed);
 							rewind_block_index_tail();
@@ -2596,7 +2663,7 @@ class ConcurrentQueue
 						return false;
 					}
 					
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 					newBlock->owner = this;
 #endif
 					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
@@ -2607,7 +2674,7 @@ class ConcurrentQueue
 					
 					// Store the chain of blocks so that we can undo if later allocations fail,
 					// and so that we can find the blocks when we do the actual enqueueing
-					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) != 0 || firstAllocatedBlock != nullptr) {
+					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
 						assert(this->tailBlock != nullptr);
 						this->tailBlock->next = newBlock;
 					}
@@ -2621,16 +2688,16 @@ class ConcurrentQueue
 			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
 			currentTailIndex = startTailIndex;
 			this->tailBlock = startBlock;
-			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
-			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0 && firstAllocatedBlock != nullptr) {
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
 				this->tailBlock = firstAllocatedBlock;
 			}
 			while (true) {
-				auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
 					stopIndex = newTailIndex;
 				}
-				if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
 					while (currentTailIndex != stopIndex) {
 						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
 					}
@@ -2638,7 +2705,7 @@ class ConcurrentQueue
 				else {
 					MOODYCAMEL_TRY {
 						while (currentTailIndex != stopIndex) {
-							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
 							++currentTailIndex;
 							++itemFirst;
 						}
@@ -2649,12 +2716,12 @@ class ConcurrentQueue
 						
 						if (!details::is_trivially_destructible<T>::value) {
 							auto block = startBlock;
-							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE_ - 1)) == 0) {
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
 								block = firstAllocatedBlock;
 							}
 							currentTailIndex = startTailIndex;
 							while (true) {
-								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
 									stopIndex = constructedStopIndex;
 								}
@@ -2668,9 +2735,9 @@ class ConcurrentQueue
 							}
 						}
 						
-						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE_ - 1);
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
 						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
-							currentTailIndex += static_cast<index_t>(BLOCK_SIZE_);
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
 							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
 							idxEntry->value.store(nullptr, std::memory_order_relaxed);
 							rewind_block_index_tail();
@@ -2690,6 +2757,9 @@ class ConcurrentQueue
 			this->tailIndex.store(newTailIndex, std::memory_order_release);
 			return true;
 		}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
 		
 		template<typename It>
 		size_t dequeue_bulk(It& itemFirst, size_t max)
@@ -2721,7 +2791,7 @@ class ConcurrentQueue
 					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
 					do {
 						auto blockStartIndex = index;
-						auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 						
 						auto entry = localBlockIndex->index[indexIndex];
@@ -2753,7 +2823,7 @@ class ConcurrentQueue
 									}
 									
 									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 										debug::DebugLock lock(mutex);
 #endif
 										entry->value.store(nullptr, std::memory_order_relaxed);
@@ -2762,7 +2832,7 @@ class ConcurrentQueue
 									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
 									
 									blockStartIndex = index;
-									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE_ - 1)) + static_cast<index_t>(BLOCK_SIZE_);
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
 									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
 								} while (index != firstIndex + actualCount);
 								
@@ -2771,7 +2841,7 @@ class ConcurrentQueue
 						}
 						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
 							{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 								debug::DebugLock lock(mutex);
 #endif
 								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
@@ -2819,7 +2889,7 @@ class ConcurrentQueue
 			if (localBlockIndex == nullptr) {
 				return false;  // this can happen if new_block_index failed in the constructor
 			}
-			auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
 			idxEntry = localBlockIndex->index[newTail];
 			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
 				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
@@ -2830,7 +2900,10 @@ class ConcurrentQueue
 			}
 			
 			// No room in the old block index, try to allocate another one!
-			if (allocMode == CannotAlloc || !new_block_index()) {
+			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+				return false;
+			}
+			else if (!new_block_index()) {
 				return false;
 			}
 			localBlockIndex = blockIndex.load(std::memory_order_relaxed);
@@ -2857,17 +2930,17 @@ class ConcurrentQueue
 		
 		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
 		{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 			debug::DebugLock lock(mutex);
 #endif
-			index &= ~static_cast<index_t>(BLOCK_SIZE_ - 1);
+			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
 			localBlockIndex = blockIndex.load(std::memory_order_acquire);
 			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
 			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
 			assert(tailBase != INVALID_BLOCK_BASE);
 			// Note: Must use division instead of shift because the index may wrap around, causing a negative
 			// offset, whose negativity we want to preserve
-			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE_);
+			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);
 			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
 			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
 			return idx;
@@ -2933,10 +3006,10 @@ class ConcurrentQueue
 	private:
 #endif
 
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
 		mutable debug::DebugMutex mutex;
 #endif
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		friend struct MemStats;
 #endif
 	};
@@ -2976,7 +3049,7 @@ class ConcurrentQueue
 	
 	inline void add_block_to_free_list(Block* block)
 	{
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 		block->owner = nullptr;
 #endif
 		freeList.add(block);
@@ -3010,15 +3083,16 @@ class ConcurrentQueue
 			return block;
 		}
 		
-		if (canAlloc == CanAlloc) {
+		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
 			return create<Block>();
 		}
-		
-		return nullptr;
+		else {
+			return nullptr;
+		}
 	}
 	
 
-#if MCDBGQ_TRACKMEM
+#ifdef MCDBGQ_TRACKMEM
 	public:
 		struct MemStats {
 			size_t allocatedBlocks;
@@ -3073,7 +3147,7 @@ class ConcurrentQueue
 								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
 							}
 						}
-						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE_) {
+						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
 							//auto block = prod->get_block_index_entry_for_index(head);
 							++stats.usedBlocks;
 						}
@@ -3136,7 +3210,7 @@ class ConcurrentQueue
 	
 	ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled)
 	{
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
 		// Try to re-use one first
@@ -3243,50 +3317,56 @@ class ConcurrentQueue
 	
 	inline void populate_initial_implicit_producer_hash()
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-		
-		implicitProducerHashCount.store(0, std::memory_order_relaxed);
-		auto hash = &initialImplicitProducerHash;
-		hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
-		hash->entries = &initialImplicitProducerHashEntries[0];
-		for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
-			initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			implicitProducerHashCount.store(0, std::memory_order_relaxed);
+			auto hash = &initialImplicitProducerHash;
+			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+			hash->entries = &initialImplicitProducerHashEntries[0];
+			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+			}
+			hash->prev = nullptr;
+			implicitProducerHash.store(hash, std::memory_order_relaxed);
 		}
-		hash->prev = nullptr;
-		implicitProducerHash.store(hash, std::memory_order_relaxed);
 	}
 	
 	void swap_implicit_producer_hashes(ConcurrentQueue& other)
 	{
-		if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;
-		
-		// Swap (assumes our implicit producer hash is initialized)
-		initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
-		initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
-		other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
-		
-		details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
-		
-		details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
-		if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
-			implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
 		}
 		else {
-			ImplicitProducerHash* hash;
-			for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
-				continue;
+			// Swap (assumes our implicit producer hash is initialized)
+			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+
+			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+
+			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
 			}
-			hash->prev = &initialImplicitProducerHash;
-		}
-		if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
-			other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
-		}
-		else {
-			ImplicitProducerHash* hash;
-			for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
-				continue;
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &initialImplicitProducerHash;
+			}
+			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &other.initialImplicitProducerHash;
 			}
-			hash->prev = &other.initialImplicitProducerHash;
 		}
 	}
 	
@@ -3303,7 +3383,7 @@ class ConcurrentQueue
 		
 		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
 		
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
 		
@@ -3311,6 +3391,7 @@ class ConcurrentQueue
 		auto hashedId = details::hash_thread_id(id);
 		
 		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
 		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
 			// Look for the id in this hash
 			auto index = hashedId;
@@ -3357,6 +3438,7 @@ class ConcurrentQueue
 		// Insert!
 		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
 		while (true) {
+			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
 			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
 				// We've acquired the resize lock, try to allocate a bigger hash table.
 				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
@@ -3377,7 +3459,7 @@ class ConcurrentQueue
 					}
 					
 					auto newHash = new (raw) ImplicitProducerHash;
-					newHash->capacity = newCapacity;
+					newHash->capacity = static_cast<size_t>(newCapacity);
 					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
 					for (size_t i = 0; i != newCapacity; ++i) {
 						new (newHash->entries + i) ImplicitProducerKVP;
@@ -3448,7 +3530,7 @@ class ConcurrentQueue
 		details::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);
 		
 		// Remove from hash
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 		debug::DebugLock lock(implicitProdMutex);
 #endif
 		auto hash = implicitProducerHash.load(std::memory_order_acquire);
@@ -3487,55 +3569,76 @@ class ConcurrentQueue
 	//////////////////////////////////
 	// Utility functions
 	//////////////////////////////////
-	
+
+	template<typename TAlign>
+	static inline void* aligned_malloc(size_t size)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::malloc)(size);
+		else {
+			size_t alignment = std::alignment_of<TAlign>::value;
+			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
+			if (!raw)
+				return nullptr;
+			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
+			*(reinterpret_cast<void**>(ptr) - 1) = raw;
+			return ptr;
+		}
+	}
+
+	template<typename TAlign>
+	static inline void aligned_free(void* ptr)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::free)(ptr);
+		else
+			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
+	}
+
 	template<typename U>
 	static inline U* create_array(size_t count)
 	{
 		assert(count > 0);
-		auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count));
-		if (p == nullptr) {
+		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
+		if (p == nullptr)
 			return nullptr;
-		}
-		
-		for (size_t i = 0; i != count; ++i) {
+
+		for (size_t i = 0; i != count; ++i)
 			new (p + i) U();
-		}
 		return p;
 	}
-	
+
 	template<typename U>
 	static inline void destroy_array(U* p, size_t count)
 	{
 		if (p != nullptr) {
 			assert(count > 0);
-			for (size_t i = count; i != 0; ) {
+			for (size_t i = count; i != 0; )
 				(p + --i)->~U();
-			}
-			(Traits::free)(p);
 		}
+		aligned_free<U>(p);
 	}
-	
+
 	template<typename U>
 	static inline U* create()
 	{
-		auto p = (Traits::malloc)(sizeof(U));
+		void* p = aligned_malloc<U>(sizeof(U));
 		return p != nullptr ? new (p) U : nullptr;
 	}
-	
+
 	template<typename U, typename A1>
 	static inline U* create(A1&& a1)
 	{
-		auto p = (Traits::malloc)(sizeof(U));
+		void* p = aligned_malloc<U>(sizeof(U));
 		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
 	}
-	
+
 	template<typename U>
 	static inline void destroy(U* p)
 	{
-		if (p != nullptr) {
+		if (p != nullptr)
 			p->~U();
-		}
-		(Traits::free)(p);
+		aligned_free<U>(p);
 	}
 
 private:
@@ -3546,7 +3649,7 @@ class ConcurrentQueue
 	Block* initialBlockPool;
 	size_t initialBlockPoolSize;
 	
-#if !MCDBGQ_USEDEBUGFREELIST
+#ifndef MCDBGQ_USEDEBUGFREELIST
 	FreeList<Block> freeList;
 #else
 	debug::DebugFreeList<Block> freeList;
@@ -3561,7 +3664,7 @@ class ConcurrentQueue
 	std::atomic<std::uint32_t> nextExplicitConsumerId;
 	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
 	
-#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
 	debug::DebugMutex implicitProdMutex;
 #endif
 	
@@ -3595,7 +3698,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
 	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
 {
 	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
-	lastKnownGlobalOffset = -1;
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
 }
 
 template<typename T, typename Traits>
@@ -3603,7 +3706,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
 	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
 {
 	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
-	lastKnownGlobalOffset = -1;
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
 }
 
 template<typename T, typename Traits>
@@ -3630,6 +3733,10 @@ inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, ty
 
 }
 
-#if defined(__GNUC__)
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #pragma GCC diagnostic pop
 #endif
diff --git a/src/modules/common/csv.hpp b/src/modules/common/csv.hpp
index 9cebfc2b0..ae4267a2c 100644
--- a/src/modules/common/csv.hpp
+++ b/src/modules/common/csv.hpp
@@ -1831,7 +1831,7 @@ using shared_ummap_sink = basic_shared_mmap_sink<unsigned char>;
 # ifndef WIN32_LEAN_AND_MEAN
 #  define WIN32_LEAN_AND_MEAN
 # endif
-# include <Windows.h>
+# include <windows.h>
 # undef max
 # undef min
 #elif defined(__linux__)
diff --git a/src/modules/common/lightweightsemaphore.h b/src/modules/common/lightweightsemaphore.h
new file mode 100644
index 000000000..b0f24e1cd
--- /dev/null
+++ b/src/modules/common/lightweightsemaphore.h
@@ -0,0 +1,411 @@
+// Provides an efficient implementation of a semaphore (LightweightSemaphore).
+// This is an extension of Jeff Preshing's sempahore implementation (licensed 
+// under the terms of its separate zlib license) that has been adapted and
+// extended by Cameron Desrochers.
+
+#pragma once
+
+#include <cstddef> // For std::size_t
+#include <atomic>
+#include <type_traits> // For std::make_signed<T>
+
+#if defined(_WIN32)
+// Avoid including windows.h in a header; we only need a handful of
+// items, so we'll redeclare them here (this is relatively safe since
+// the API generally has to remain stable between Windows versions).
+// I know this is an ugly hack but it still beats polluting the global
+// namespace with thousands of generic names or adding a .cpp for nothing.
+extern "C" {
+	struct _SECURITY_ATTRIBUTES;
+	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+}
+#elif defined(__MACH__)
+#include <mach/mach.h>
+#elif defined(__unix__)
+#include <semaphore.h>
+#endif
+
+namespace moodycamel
+{
+namespace details
+{
+
+// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
+// portable + lightweight semaphore implementations, originally from
+// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+// LICENSE:
+// Copyright (c) 2015 Jeff Preshing
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+//	claim that you wrote the original software. If you use this software
+//	in a product, an acknowledgement in the product documentation would be
+//	appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//	misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+#if defined(_WIN32)
+class Semaphore
+{
+private:
+	void* m_hSema;
+	
+	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+	Semaphore(int initialCount = 0)
+	{
+		assert(initialCount >= 0);
+		const long maxLong = 0x7fffffff;
+		m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+		assert(m_hSema);
+	}
+
+	~Semaphore()
+	{
+		CloseHandle(m_hSema);
+	}
+
+	bool wait()
+	{
+		const unsigned long infinite = 0xffffffff;
+		return WaitForSingleObject(m_hSema, infinite) == 0;
+	}
+	
+	bool try_wait()
+	{
+		return WaitForSingleObject(m_hSema, 0) == 0;
+	}
+	
+	bool timed_wait(std::uint64_t usecs)
+	{
+		return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
+	}
+
+	void signal(int count = 1)
+	{
+		while (!ReleaseSemaphore(m_hSema, count, nullptr));
+	}
+};
+#elif defined(__MACH__)
+//---------------------------------------------------------
+// Semaphore (Apple iOS and OSX)
+// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+//---------------------------------------------------------
+class Semaphore
+{
+private:
+	semaphore_t m_sema;
+
+	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+	Semaphore(int initialCount = 0)
+	{
+		assert(initialCount >= 0);
+		kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+		assert(rc == KERN_SUCCESS);
+		(void)rc;
+	}
+
+	~Semaphore()
+	{
+		semaphore_destroy(mach_task_self(), m_sema);
+	}
+
+	bool wait()
+	{
+		return semaphore_wait(m_sema) == KERN_SUCCESS;
+	}
+	
+	bool try_wait()
+	{
+		return timed_wait(0);
+	}
+	
+	bool timed_wait(std::uint64_t timeout_usecs)
+	{
+		mach_timespec_t ts;
+		ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
+		ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
+
+		// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+		kern_return_t rc = semaphore_timedwait(m_sema, ts);
+		return rc == KERN_SUCCESS;
+	}
+
+	void signal()
+	{
+		while (semaphore_signal(m_sema) != KERN_SUCCESS);
+	}
+
+	void signal(int count)
+	{
+		while (count-- > 0)
+		{
+			while (semaphore_signal(m_sema) != KERN_SUCCESS);
+		}
+	}
+};
+#elif defined(__unix__)
+//---------------------------------------------------------
+// Semaphore (POSIX, Linux)
+//---------------------------------------------------------
+class Semaphore
+{
+private:
+	sem_t m_sema;
+
+	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+	Semaphore(int initialCount = 0)
+	{
+		assert(initialCount >= 0);
+		int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
+		assert(rc == 0);
+		(void)rc;
+	}
+
+	~Semaphore()
+	{
+		sem_destroy(&m_sema);
+	}
+
+	bool wait()
+	{
+		// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+		int rc;
+		do {
+			rc = sem_wait(&m_sema);
+		} while (rc == -1 && errno == EINTR);
+		return rc == 0;
+	}
+
+	bool try_wait()
+	{
+		int rc;
+		do {
+			rc = sem_trywait(&m_sema);
+		} while (rc == -1 && errno == EINTR);
+		return rc == 0;
+	}
+
+	bool timed_wait(std::uint64_t usecs)
+	{
+		struct timespec ts;
+		const int usecs_in_1_sec = 1000000;
+		const int nsecs_in_1_sec = 1000000000;
+		clock_gettime(CLOCK_REALTIME, &ts);
+		ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
+		ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
+		// sem_timedwait bombs if you have more than 1e9 in tv_nsec
+		// so we have to clean things up before passing it in
+		if (ts.tv_nsec >= nsecs_in_1_sec) {
+			ts.tv_nsec -= nsecs_in_1_sec;
+			++ts.tv_sec;
+		}
+
+		int rc;
+		do {
+			rc = sem_timedwait(&m_sema, &ts);
+		} while (rc == -1 && errno == EINTR);
+		return rc == 0;
+	}
+
+	void signal()
+	{
+		while (sem_post(&m_sema) == -1);
+	}
+
+	void signal(int count)
+	{
+		while (count-- > 0)
+		{
+			while (sem_post(&m_sema) == -1);
+		}
+	}
+};
+#else
+#error Unsupported platform! (No semaphore wrapper available)
+#endif
+
+}	// end namespace details
+
+
+//---------------------------------------------------------
+// LightweightSemaphore
+//---------------------------------------------------------
+class LightweightSemaphore
+{
+public:
+	typedef std::make_signed<std::size_t>::type ssize_t;
+
+private:
+	std::atomic<ssize_t> m_count;
+	details::Semaphore m_sema;
+	int m_maxSpins;
+
+	bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
+	{
+		ssize_t oldCount;
+		int spin = m_maxSpins;
+		while (--spin >= 0)
+		{
+			oldCount = m_count.load(std::memory_order_relaxed);
+			if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+				return true;
+			std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
+		}
+		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+		if (oldCount > 0)
+			return true;
+		if (timeout_usecs < 0)
+		{
+			if (m_sema.wait())
+				return true;
+		}
+		if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
+			return true;
+		// At this point, we've timed out waiting for the semaphore, but the
+		// count is still decremented indicating we may still be waiting on
+		// it. So we have to re-adjust the count, but only if the semaphore
+		// wasn't signaled enough times for us too since then. If it was, we
+		// need to release the semaphore too.
+		while (true)
+		{
+			oldCount = m_count.load(std::memory_order_acquire);
+			if (oldCount >= 0 && m_sema.try_wait())
+				return true;
+			if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
+				return false;
+		}
+	}
+
+	ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
+	{
+		assert(max > 0);
+		ssize_t oldCount;
+		int spin = m_maxSpins;
+		while (--spin >= 0)
+		{
+			oldCount = m_count.load(std::memory_order_relaxed);
+			if (oldCount > 0)
+			{
+				ssize_t newCount = oldCount > max ? oldCount - max : 0;
+				if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+					return oldCount - newCount;
+			}
+			std::atomic_signal_fence(std::memory_order_acquire);
+		}
+		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+		if (oldCount <= 0)
+		{
+			if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
+			{
+				while (true)
+				{
+					oldCount = m_count.load(std::memory_order_acquire);
+					if (oldCount >= 0 && m_sema.try_wait())
+						break;
+					if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
+						return 0;
+				}
+			}
+		}
+		if (max > 1)
+			return 1 + tryWaitMany(max - 1);
+		return 1;
+	}
+
+public:
+	LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins)
+	{
+		assert(initialCount >= 0);
+		assert(maxSpins >= 0);
+	}
+
+	bool tryWait()
+	{
+		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+		while (oldCount > 0)
+		{
+			if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+				return true;
+		}
+		return false;
+	}
+
+	bool wait()
+	{
+		return tryWait() || waitWithPartialSpinning();
+	}
+
+	bool wait(std::int64_t timeout_usecs)
+	{
+		return tryWait() || waitWithPartialSpinning(timeout_usecs);
+	}
+
+	// Acquires between 0 and (greedily) max, inclusive
+	ssize_t tryWaitMany(ssize_t max)
+	{
+		assert(max >= 0);
+		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+		while (oldCount > 0)
+		{
+			ssize_t newCount = oldCount > max ? oldCount - max : 0;
+			if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+				return oldCount - newCount;
+		}
+		return 0;
+	}
+
+	// Acquires at least one, and (greedily) at most max
+	ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
+	{
+		assert(max >= 0);
+		ssize_t result = tryWaitMany(max);
+		if (result == 0 && max > 0)
+			result = waitManyWithPartialSpinning(max, timeout_usecs);
+		return result;
+	}
+	
+	ssize_t waitMany(ssize_t max)
+	{
+		ssize_t result = waitMany(max, -1);
+		assert(result > 0);
+		return result;
+	}
+
+	void signal(ssize_t count = 1)
+	{
+		assert(count >= 0);
+		ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+		ssize_t toRelease = -oldCount < count ? -oldCount : count;
+		if (toRelease > 0)
+		{
+			m_sema.signal((int)toRelease);
+		}
+	}
+	
+	std::size_t availableApprox() const
+	{
+		ssize_t count = m_count.load(std::memory_order_relaxed);
+		return count > 0 ? static_cast<std::size_t>(count) : 0;
+	}
+};
+
+}   // end namespace moodycamel
diff --git a/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.cpp b/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.cpp
index e7165bbdb..5af2d6324 100644
--- a/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.cpp
+++ b/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.cpp
@@ -106,6 +106,10 @@ bool LibpqDataSourceImpl::init() {
 	return true;
 }
 
+void LibpqDataSourceImpl::handle() {
+	rdTsk->run();
+}
+
 void LibpqDataSourceImpl::beginAsync(LibpqAsyncReq* areq) {
 	if(isAsync) {
 #ifdef HAVE_LIBPQ
@@ -458,7 +462,7 @@ void PgReadTask::run() {
 			PQclear(res);
 			continue;
 		}
-		if(q->isPrepared) {
+		if(q->isPrepared && !q->prepared) {
 			if (PQresultStatus(res) != PGRES_COMMAND_OK) {
 				fprintf(stderr, "PREPARE failed: %s\n", PQerrorMessage(ths->conn));
 				if(ritem->fcb!=NULL) {
@@ -473,7 +477,6 @@ void PgReadTask::run() {
 				counter = -1;
 			} else {
 				ritem->cnt--;
-				q->isPrepared = false;
 			}
 			PQclear(res);
 		} else if(q->isSelect) {
@@ -662,38 +665,41 @@ void PgReadTask::submit(LibpqAsyncReq* nitem) {
 
 			int psize = (int)q->pvals.size();
 
-			if(q->isPrepared && ths->prepStmtMap.find(q->query)==ths->prepStmtMap.end()) {
-				//fprintf(stdout, "Prepare query....\n");fflush(stdout);
-				ritem->cnt++;
-				ths->prepStmtMap[q->query] = CastUtil::fromNumber(ths->prepStmtMap.size()+1);
-				int qs = PQsendPrepare(ths->conn,ths->prepStmtMap[q->query].c_str(), q->query.c_str(), psize, NULL);
-
-				if (!qs) {
-					fprintf(stderr, "Failed to prepare query %s\n", PQerrorMessage(ths->conn));
-					if(ritem->fcb!=NULL) {
-						ritem->fcb(ritem->ctx, false, NULL, q->query, counter);
-					} else if(q->fcb!=NULL) {
-						q->fcb(q->ctx, false, NULL, q->query, counter);
+			std::map<std::string, std::string>::iterator it;
+			if(q->isPrepared) {
+				if((it = ths->prepStmtMap.find(q->query))==ths->prepStmtMap.end()) {
+					//fprintf(stdout, "Prepare query....\n");fflush(stdout);
+					ritem->cnt++;
+					ths->prepStmtMap[q->query] = CastUtil::fromNumber(ths->prepStmtMap.size()+1);
+					int qs = PQsendPrepare(ths->conn,ths->prepStmtMap[q->query].c_str(), q->query.c_str(), psize, NULL);
+
+					if (!qs) {
+						fprintf(stderr, "Failed to prepare query %s\n", PQerrorMessage(ths->conn));
+						if(ritem->fcb!=NULL) {
+							ritem->fcb(ritem->ctx, false, NULL, q->query, counter);
+						} else if(q->fcb!=NULL) {
+							q->fcb(q->ctx, false, NULL, q->query, counter);
+						}
+						q = NULL;
+						ritem = NULL;
+						ths->pop();
+					} else {
+						flux = true;
+						PQflush(ths->conn);
 					}
-					q = NULL;
-					ritem = NULL;
-					ths->pop();
+					return;
 				} else {
-					flux = true;
-					PQflush(ths->conn);
+					q->prepared = true;
 				}
-				return;
-			} else {
-				q->isPrepared = false;
 			}
 
 
 			int qs = -1;
 			if(q->isMulti) {
 				qs = PQsendQuery(ths->conn, q->query.c_str());
-			} else if(q->isPrepared) {
+			} else if(q->prepared) {
 				if(psize==0) {
-					qs = PQsendQueryPrepared(ths->conn, ths->prepStmtMap[q->query].c_str(), psize, NULL, NULL, NULL, 1);
+					qs = PQsendQueryPrepared(ths->conn, it->second.c_str(), psize, NULL, NULL, NULL, 1);
 				} else {
 					const char *paramValues[psize];
 					int paramLengths[psize];
@@ -715,7 +721,7 @@ void PgReadTask::submit(LibpqAsyncReq* nitem) {
 						}
 						paramBinary[var] = 1;
 					}
-					qs = PQsendQueryPrepared(ths->conn, ths->prepStmtMap[q->query].c_str(), psize, paramValues, paramLengths, paramBinary, 1);
+					qs = PQsendQueryPrepared(ths->conn, it->second.c_str(), psize, paramValues, paramLengths, paramBinary, 1);
 				}
 			} else{
 				if(psize==0) {
@@ -814,7 +820,8 @@ PGresult* LibpqDataSourceImpl::executeSync(LibpqQuery* q) {
 
 	int paramSize = q->pvals.size();
 	if(q->isPrepared) {
-		if(prepStmtMap.find(q->query)==prepStmtMap.end()) {
+		std::map<std::string, std::string>::iterator it;
+		if((it=prepStmtMap.find(q->query))==prepStmtMap.end()) {
 			prepStmtMap[q->query] = CastUtil::fromNumber(prepStmtMap.size()+1);
 			PGresult* res = PQprepare(conn, prepStmtMap[q->query].c_str(), q->query.c_str(), paramSize, NULL);
 			if (PQresultStatus(res) != PGRES_COMMAND_OK) {
@@ -825,7 +832,7 @@ PGresult* LibpqDataSourceImpl::executeSync(LibpqQuery* q) {
 		}
 
 		if(paramSize==0) {
-			PGresult* res = PQexecPrepared(conn, prepStmtMap[q->query].c_str(), 0, NULL, NULL, NULL, 1);
+			PGresult* res = PQexecPrepared(conn, it->second.c_str(), 0, NULL, NULL, NULL, 1);
 			return res;
 		} else {
 			const char *paramValues[paramSize];
@@ -848,7 +855,7 @@ PGresult* LibpqDataSourceImpl::executeSync(LibpqQuery* q) {
 				}
 				paramBinary[var] = 1;
 			}
-			PGresult* res = PQexecPrepared(conn, prepStmtMap[q->query].c_str(), paramSize, paramValues, paramLengths, paramBinary, 1);
+			PGresult* res = PQexecPrepared(conn, it->second.c_str(), paramSize, paramValues, paramLengths, paramBinary, 1);
 			return res;
 		}
 	} else {
@@ -1249,7 +1256,7 @@ void PgBatchReadTask::run() {
 		counter++;
 		q = ritem->peek();
 
-		if(q->isPrepared) {
+		if(q->isPrepared && !q->prepared) {
 			if (PQresultStatus(res) != PGRES_COMMAND_OK) {
 				fprintf(stderr, "PREPARE failed: %s\n", PQerrorMessage(ths->conn));
 				if(ritem->fcb!=NULL) {
@@ -1262,7 +1269,7 @@ void PgBatchReadTask::run() {
 				ths->pop();
 				counter = -1;
 			} else {
-				q->isPrepared = false;
+				q->prepared = true;
 				ritem->cnt--;
 			}
 			PQclear(res);
@@ -1515,34 +1522,37 @@ void PgBatchReadTask::batchQueries(LibpqAsyncReq* nitem, int& numQueriesInBatch)
 			return;
 		}
 
-		if(q->isPrepared && ths->prepStmtMap.find(q->query)==ths->prepStmtMap.end()) {
-			//ths->logger << ("batchQueries:PQsendPrepare\n");
-			nitem->cnt++;
-			ths->prepStmtMap[q->query] = CastUtil::fromNumber(ths->prepStmtMap.size()+1);
-			int qs = PQsendPrepare(ths->conn,ths->prepStmtMap[q->query].c_str(), q->query.c_str(), psize, NULL);
-			//printf("ADD PQsendPrepare to batch\n");
-			numQueriesInBatch++;
+		std::map<std::string, std::string>::iterator it;
+		if(q->isPrepared) {
+			if((it = ths->prepStmtMap.find(q->query))==ths->prepStmtMap.end()) {
+				//ths->logger << ("batchQueries:PQsendPrepare\n");
+				nitem->cnt++;
+				ths->prepStmtMap[q->query] = CastUtil::fromNumber(ths->prepStmtMap.size()+1);
+				int qs = PQsendPrepare(ths->conn,ths->prepStmtMap[q->query].c_str(), q->query.c_str(), psize, NULL);
+				//printf("ADD PQsendPrepare to batch\n");
+				numQueriesInBatch++;
 
-			if (!qs) {
-				fprintf(stderr, "Failed to prepare query %s\n", PQerrorMessage(ths->conn));
-				if(nitem->fcb!=NULL) {
-					nitem->fcb(nitem->ctx, false, NULL, q->query, i+1);
-				} else if(q->fcb!=NULL) {
-					q->fcb(q->ctx, false, NULL, q->query, i+1);
+				if (!qs) {
+					fprintf(stderr, "Failed to prepare query %s\n", PQerrorMessage(ths->conn));
+					if(nitem->fcb!=NULL) {
+						nitem->fcb(nitem->ctx, false, NULL, q->query, i+1);
+					} else if(q->fcb!=NULL) {
+						q->fcb(q->ctx, false, NULL, q->query, i+1);
+					}
+					return;
 				}
-				return;
+			} else {
+				q->prepared = true;
 			}
-		} else {
-			q->isPrepared = false;
 		}
 
 		int qs = -1;
 		if(q->isMulti) {
 			qs = PQsendQueryParams(ths->conn, q->query.c_str(), 0, NULL, NULL, NULL, NULL, 1);
 			//ths->logger << ("batchQueries:ADD PQsendQueryParams to batch\n");
-		} else if(q->isPrepared) {
+		} else if(q->prepared) {
 			if(psize==0) {
-				qs = PQsendQueryPrepared(ths->conn, ths->prepStmtMap[q->query].c_str(), psize, NULL, NULL, NULL, 1);
+				qs = PQsendQueryPrepared(ths->conn, it->second.c_str(), psize, NULL, NULL, NULL, 1);
 			} else {
 				const char *paramValues[psize];
 				int paramLengths[psize];
@@ -1564,7 +1574,7 @@ void PgBatchReadTask::batchQueries(LibpqAsyncReq* nitem, int& numQueriesInBatch)
 					}
 					paramBinary[var] = 1;
 				}
-				qs = PQsendQueryPrepared(ths->conn, ths->prepStmtMap[q->query].c_str(), psize, paramValues, paramLengths, paramBinary, 1);
+				qs = PQsendQueryPrepared(ths->conn, it->second.c_str(), psize, paramValues, paramLengths, paramBinary, 1);
 			}
 		} else{
 			if(psize==0) {
@@ -1725,6 +1735,7 @@ void LibpqQuery::reset() {
 	cbType = 0;
 	isSelect = false;
 	isPrepared = false;
+	prepared = false;
 	isMulti = false;
 	pvals.clear();
 	ctx = NULL;
@@ -1742,6 +1753,7 @@ LibpqQuery::LibpqQuery() {
 	cbType = -1;
 	isSelect = false;
 	isPrepared = false;
+	prepared = false;
 	isMulti = false;
 	ctx = NULL;
 }
diff --git a/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.h b/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.h
index 418b5751c..48ccaf976 100644
--- a/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.h
+++ b/src/modules/sdorm/sql/libpq/LibpqDataSourceImpl.h
@@ -146,6 +146,7 @@ typedef void (*LipqCbFunc6) (void* ctx, int row, int col, char* value);
 class LibpqQuery {
 	std::list<LibpqParam> pvals;
 	bool isPrepared;
+	bool prepared;
 	bool isSelect;
 	bool isMulti;
 	std::string query;
@@ -167,8 +168,8 @@ class LibpqQuery {
 #endif
 public:
 	void reset();
-	LibpqQuery& withSelectQuery(const std::string& query, bool isPrepared = false);
-	LibpqQuery& withUpdateQuery(const std::string& query, bool isPrepared = false);
+	LibpqQuery& withSelectQuery(const std::string& query, bool isPrepared = true);
+	LibpqQuery& withUpdateQuery(const std::string& query, bool isPrepared = true);
 	LibpqQuery& withPrepared();
 	LibpqQuery& withContext(void* ctx);
 	LibpqQuery& withMulti();//multi-statement non parameterized queries
@@ -353,6 +354,8 @@ class LibpqDataSourceImpl : public DataSourceType, public SocketInterface {
 	void postAsync(LibpqAsyncReq* vitem);
 	void postAsync(LibpqAsyncReq* vitem, int numQ);//post async request with n number of multi queries
 	//Asynchronous mode operations, NOT THREAD SAFE
+
+	void handle();
 };
 
 #endif /* LibpqDataSourceIMPL_H_ */
diff --git a/src/modules/server-util/RequestHandler2.cpp b/src/modules/server-util/RequestHandler2.cpp
index ae4c52c3b..4e9685c00 100644
--- a/src/modules/server-util/RequestHandler2.cpp
+++ b/src/modules/server-util/RequestHandler2.cpp
@@ -51,7 +51,7 @@ RequestHandler2* RequestHandler2::getInstance() {
 	return _i;
 }
 
-void RequestHandler2::startNL(unsigned int cid) {
+void RequestHandler2::startNL(unsigned int cid, bool withWQ) {
 	if(run) {
 		return;
 	}
@@ -60,6 +60,10 @@ void RequestHandler2::startNL(unsigned int cid) {
 		run = true;
 		Thread* pthread = new Thread(&handle, this);
 		pthread->execute(cid);
+		if(withWQ) {
+			Thread* pthread = new Thread(&handleWrites, this);
+			pthread->execute(cid);
+		}
 	}
 }
 
@@ -83,7 +87,7 @@ void RequestHandler2::addListenerSocket(doRegisterListener drl, const SOCKET& li
 	selector.addListeningSocket(this->listenerSock);
 }
 
-void RequestHandler2::start(unsigned int cid) {
+void RequestHandler2::start(unsigned int cid, bool withWQ) {
 	if(run) {
 		return;
 	}
@@ -92,6 +96,10 @@ void RequestHandler2::start(unsigned int cid) {
 		selector.initialize(listenerSock, -1);
 		Thread* pthread = new Thread(&handle, this);
 		pthread->execute(cid);
+		if(withWQ) {
+			Thread* pthread = new Thread(&handleWrites, this);
+			pthread->execute(cid);
+		}
 	}
 }
 
@@ -139,14 +147,12 @@ BaseSocket* RequestHandler2::loopEventCb(SelEpolKqEvPrt* ths, BaseSocket* bi, in
 			return si;
 		}
 		case READ_READY: {
-			Http11Socket* si = (Http11Socket*)bi;
-			si->handle();
+			bi->handle();
 			break;
 		}
 		case CLOSED: {
-			Http11Socket* si = (Http11Socket*)bi;
-			si->onClose();
-			ins->shi->closeConnection(si);
+			bi->onClose();
+			ins->shi->closeConnection(bi);
 			break;
 		}
 		case WRITE_READY: {
@@ -198,6 +204,17 @@ void* RequestHandler2::handle(void* inp) {
 	return 0;
 }
 
+void* RequestHandler2::handleWrites(void* inp) {
+	RequestHandler2* ins  = static_cast<RequestHandler2*>(inp);
+	SockWriteRequest swr;
+	ins->selector.wQ = new moodycamel::BlockingConcurrentQueue<SockWriteRequest>;
+	while(ins->shi->run) {
+		ins->selector.wQ->wait_dequeue(swr);
+		swr.f(swr.bs, swr.arg);
+	}
+	return 0;
+}
+
 Http11Socket::Http11Socket(const SOCKET& fd, const int& chunkSize, const int& connKeepAlive, const int& maxReqHdrCnt, const int& maxEntitySize): BaseSecureSocket(fd) {
 	isHeadersDone = false;
 	bytesToRead = 0;
diff --git a/src/modules/server-util/RequestHandler2.h b/src/modules/server-util/RequestHandler2.h
index ac7f072ea..2fe6a35c5 100644
--- a/src/modules/server-util/RequestHandler2.h
+++ b/src/modules/server-util/RequestHandler2.h
@@ -16,7 +16,6 @@
 #include "HttpRequest.h"
 #include "HttpResponse.h"
 #include "Client.h"
-#include "concurrentqueue.h"
 #include "atomic"
 #ifdef HAVE_SSLINC
 #include "SSLClient.h"
@@ -58,6 +57,7 @@ class RequestHandler2 {
 	std::vector<Http11Socket*> clsdConns;
 	bool isActive();
 	static void* handle(void* inp);
+	static void* handleWrites(void* inp);
 	static RequestHandler2* _i;
 	httpSockHandle hsh;
 	friend class LibpqDataSourceImpl;
@@ -68,8 +68,8 @@ class RequestHandler2 {
 	static BaseSocket* loopEventCb(SelEpolKqEvPrt* ths, BaseSocket* sfd, int type, int fd, char* buf, size_t len, bool isClosed);
 	static void setInstance(RequestHandler2*);
 	static RequestHandler2* getInstance();
-	void start(unsigned int cid);
-	void startNL(unsigned int cid);
+	void start(unsigned int cid, bool withWQ);
+	void startNL(unsigned int cid, bool withWQ);
 	void addListenerSocket(doRegisterListener drl, const SOCKET& listenerSock);
 	void stop(std::string, int, bool);
 	RequestHandler2(ServiceHandler* shi, const bool& isMain, bool isSSLEnabled, httpSockHandle hsh, const SOCKET& listenerSock = INVALID_SOCKET);
diff --git a/src/modules/server-util/SocketInterface.cpp b/src/modules/server-util/SocketInterface.cpp
index 9454f298f..df3583da4 100644
--- a/src/modules/server-util/SocketInterface.cpp
+++ b/src/modules/server-util/SocketInterface.cpp
@@ -89,10 +89,16 @@ SocketInterface::SocketInterface(const SOCKET& fd) : BaseSecureSocket(fd) {
 
 
 BaseSecureSocket::BaseSecureSocket(): BaseSocket() {
+#ifdef HAVE_SSLINC
+	io = NULL;
+	ssl = NULL;
+#endif
 }
 
 BaseSecureSocket::BaseSecureSocket(const SOCKET& fd): BaseSocket(fd) {
 #ifdef HAVE_SSLINC
+	io = NULL;
+	ssl = NULL;
 	init(fd, ssl, io);
 #endif
 }
@@ -271,6 +277,7 @@ int SocketInterface::pushResponse(void* request, void* response, void* context,
 int SocketInterface::startRequest() {
 	int rp = ++reqPos;
 	//wm.lock();
+	//wtl.emplace(rp, ResponseData());
 	wtl[rp] = ResponseData();
 	//wm.unlock();
 	return rp;
diff --git a/src/modules/server-util/SocketInterface.h b/src/modules/server-util/SocketInterface.h
index 02033e0b5..d3ff84e29 100644
--- a/src/modules/server-util/SocketInterface.h
+++ b/src/modules/server-util/SocketInterface.h
@@ -50,9 +50,22 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #endif
+#include "blockingconcurrentqueue.h"
 
 class BaseSocket;
 
+typedef void (*SockWriteRequestF) (BaseSocket* bs, void* arg);
+
+class SockWriteRequest {
+	SockWriteRequestF f;
+	BaseSocket* bs;
+	void* arg;
+	SockWriteRequest():f(NULL), bs(NULL), arg(NULL) {}
+	SockWriteRequest(SockWriteRequestF f, BaseSocket* bs, void* arg): f(f), bs(bs), arg(arg) {}
+	friend class RequestHandler2;
+	friend class EventHandler;
+};
+
 class ResponseData {
 public:
 	std::string _b;
@@ -62,6 +75,13 @@ class ResponseData {
 };
 
 class EventHandler {
+	moodycamel::BlockingConcurrentQueue<SockWriteRequest>* wQ;
+	template<typename Func1> void queueWrite(Func1 f, BaseSocket* bs, void* arg) {
+		wQ->enqueue(std::move(SockWriteRequest(f, bs, arg)));
+	}
+	friend class BaseSocket;
+	friend class RequestHandler2;
+	friend class RequestReaderHandler;
 public:
 	virtual bool unRegisterWrite(BaseSocket* obj)=0;
 	virtual bool unRegisterRead(const SOCKET& descriptor)=0;
@@ -71,7 +91,13 @@ class EventHandler {
 	virtual void post_write(BaseSocket* sfd, const std::string& data)=0;
 	virtual void post_read(BaseSocket* sfd)=0;
 #endif
-	virtual ~EventHandler(){}
+	EventHandler(): wQ(NULL) {}
+	virtual ~EventHandler() {
+		if(wQ!=NULL) {
+			delete wQ;
+			wQ = NULL;
+		}
+	}
 };
 
 class BaseSocket {
@@ -117,6 +143,10 @@ class BaseSocket {
 	int writeDirect(const std::string& h, const std::string& d);
 	int writeDirect(const std::string& h, const char* d, size_t len);
 	int writeTo(ResponseData* d);
+	template<typename Func1>
+	void queueWrite(Func1 f, void* arg) {
+		eh->queueWrite(f, this, arg);
+	}
 
 	bool writeFile(int fdes, int remain_data);
 	bool isClosed();
@@ -141,8 +171,12 @@ class BaseSocket {
 	virtual int secureWriteDirect(const char* d, size_t len, int off = 0){return -1;};
 	virtual int secureWriteTo(ResponseData* d){return -1;};
 	virtual int secureReadFrom(){return -1;};
+
+	virtual void handle() {
+	}
 };
 
+
 class BaseSecureSocket: public BaseSocket {
 protected:
 #ifdef HAVE_SSLINC
diff --git a/src/modules/ssl/SSLCommon.cpp b/src/modules/ssl/SSLCommon.cpp
index 5e239a062..ee44cb2c3 100644
--- a/src/modules/ssl/SSLCommon.cpp
+++ b/src/modules/ssl/SSLCommon.cpp
@@ -244,4 +244,5 @@ void SSLCommon::closeSSL(const int& fd, SSL *ssl, BIO* bio)
 		SSL_free(ssl);
 	}
 	close(fd);
+	BIO_free_all(bio);
 }
diff --git a/src/server/embedded/CHServer.cpp b/src/server/embedded/CHServer.cpp
index be8c46520..f56d9d75a 100644
--- a/src/server/embedded/CHServer.cpp
+++ b/src/server/embedded/CHServer.cpp
@@ -1226,6 +1226,13 @@ void CHServer::serve(std::string port, std::string ipaddr, int thrdpsiz, std::st
 
 	bool isrHandler1 = rHandler=="RequestReaderHandler";
 
+	std::string qw = "false";
+	try {
+		qw = ConfigurationData::getInstance()->coreServerProperties.sprops["QUEUED_WRITES"];
+	} catch(const std::exception& e) {
+	}
+	bool isQueuedWrites = StringUtil::toLowerCopy(qw)=="true";
+
 	//Load all the FFEADContext beans so that the same copy is shared by all process
 	//We need singleton beans so only initialize singletons(controllers,authhandlers,formhandlers..)
 	logger << ("Initializing ffeadContext....") << std::endl;
@@ -1237,7 +1244,7 @@ void CHServer::serve(std::string port, std::string ipaddr, int thrdpsiz, std::st
 	ConfigurationHandler::initializeWsdls();
 	logger << ("Initializing WSDL files done....") << std::endl;
 
-	if(isrHandler1) {
+	if(!isrHandler1) {
 		isSinglEVH = true;
 	}
 
@@ -1259,7 +1266,7 @@ void CHServer::serve(std::string port, std::string ipaddr, int thrdpsiz, std::st
 		RequestHandler2::setInstance(reader);
 		handler->start();
 		reader->registerSocketInterfaceFactory(&CHServer::createSocketInterface2);
-		reader->startNL(-1);
+		reader->startNL(-1, isQueuedWrites);
 		rHandleIns = reader;
 		logger << ("Initializing RequestHandler2....") << std::endl;
 	}
diff --git a/web/te-benchmark-um-pq-async-qw/CMakeLists.txt b/web/te-benchmark-um-pq-async-qw/CMakeLists.txt
new file mode 100644
index 000000000..2b3344134
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/CMakeLists.txt
@@ -0,0 +1,14 @@
+
+file(GLOB sources
+    "include/*.h"
+    "src/*.cpp"
+)
+
+include_directories("${CMAKE_SOURCE_DIR}/web/te-benchmark-um-pq-async-qw/include")
+if(BUILD_STATIC_LIBS OR EMSCRIPTEN)
+	add_library(te-benchmark-um-pq-async-qw STATIC ${sources})
+else()
+	add_library(te-benchmark-um-pq-async-qw ${sources})
+endif()
+set_property(TARGET te-benchmark-um-pq-async-qw PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(te-benchmark-um-pq-async-qw ffead-modules ffead-framework ${HAVE_PQLIB} ${HAVE_CURLLIB} ${HAVE_SSLLIB} ${HAVE_MEMCACHEDLIB} ${HAVE_ODBCLIB} ${HAVE_MONGOCLIB} ${HAVE_BSONLIB} ${HAVE_ZLIB} ${HAVE_CRYPTOLIB})
diff --git a/web/te-benchmark-um-pq-async-qw/config/application.xml b/web/te-benchmark-um-pq-async-qw/config/application.xml
new file mode 100644
index 000000000..de9885429
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/config/application.xml
@@ -0,0 +1,24 @@
+<app router="TeBkUmLpqQwAsyncRouter">
+	<cors-config>
+		<allow-origins>*</allow-origins>
+		<allow-methods>GET, POST, HEAD, PUT, DELETE</allow-methods>
+		<allow-headers>content-type, origin</allow-headers>
+		<expose-headers>content-type, origin</expose-headers>
+		<allow-credentials>true</allow-credentials>
+		<max-age>1023</max-age>
+	</cors-config>
+	<cache-control>
+		<control ext="png,css,js,jpeg,jpg,gif" header="Cache-Control"
+			value="max-age=290304000, public" />
+		<control ext="txt,xml,json" header="Cache-Control"
+			value="max-age=172800, public, must-revalidate" />
+		<control ext="html,html" header="Cache-Control"
+			value="max-age=7200, must-revalidate" />
+		<control file="video.mov" header="Expires"
+			value="Thu, 15 Apr 2020 20:00:00 GMT" />
+		<control header="Last-Modified" remove="true" />
+	</cache-control>
+	<templates>
+		<template class="TeBkUmLpqQwAsyncRouter" file="fortunes.tpe" path="fortunes"/>
+	</templates>
+</app>
\ No newline at end of file
diff --git a/web/te-benchmark-um-pq-async-qw/config/sdorm.xml b/web/te-benchmark-um-pq-async-qw/config/sdorm.xml
new file mode 100644
index 000000000..9acb7fb92
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/config/sdorm.xml
@@ -0,0 +1,15 @@
+<sdorm>
+	<data-source>
+		<config>
+			<nodes>
+				<node>
+					<url>host=localhost user=benchmarkdbuser password=benchmarkdbpass dbname=hello_world</url>
+				</node>
+			</nodes>
+			<pool-size>30</pool-size>
+			<name>PostgreSQL-DSN</name>
+			<type>sql-raw-pq</type>
+			<async>true</async>
+		</config>
+	</data-source>
+</sdorm>
diff --git a/web/te-benchmark-um-pq-async-qw/include/TeBkUmLpqQwAsync.h b/web/te-benchmark-um-pq-async-qw/include/TeBkUmLpqQwAsync.h
new file mode 100644
index 000000000..e1b09199c
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/include/TeBkUmLpqQwAsync.h
@@ -0,0 +1,192 @@
+/*
+	Copyright 2009-2020, Sumeet Chhetri
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+/*
+ * TeBkUmLpqQwAsync.h
+ *
+ *  Created on: 03-Feb-2020
+ *      Author: sumeetc
+ */
+
+#ifndef WEB_TE_BENCHMARK_UM_INCLUDE_TeBkUmLpqQwAsync_H_
+#define WEB_TE_BENCHMARK_UM_INCLUDE_TeBkUmLpqQwAsync_H_
+#include "TemplateHandler.h"
+#include "vector"
+#ifndef OS_MINGW
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#endif
+#include "DataSourceManager.h"
+#include <stdlib.h>
+#include <algorithm>
+#include "CryptoHandler.h"
+#include "vector"
+#include "CastUtil.h"
+#include "CacheManager.h"
+#include <stdlib.h>
+#include "HttpRequest.h"
+#include "HttpResponse.h"
+#include "JSONSerialize.h"
+#include "string"
+#include "yuarel.h"
+#include "Router.h"
+#include <unordered_map>
+#include "ConfigurationData.h"
+
+class TeBkUmLpqQwAsyncWorld;
+
+class TeBkUmLpqQwAsyncWorld {
+	int id;
+	int randomNumber;
+public:
+	TeBkUmLpqQwAsyncWorld();
+	TeBkUmLpqQwAsyncWorld(int id);
+	TeBkUmLpqQwAsyncWorld(int id, int randomNumber);
+	virtual ~TeBkUmLpqQwAsyncWorld();
+	int getId() const;
+	void setId(int id);
+	int getRandomNumber() const;
+	void setRandomNumber(int randomNumber);
+#ifdef HAVE_RAPID_JSON
+	void toJson(rapidjson::Writer<rapidjson::StringBuffer>& w) {
+		w.StartObject();
+		w.String("id", 2);
+	    w.Int(id);
+		w.String("randomNumber", 12);
+	    w.Int(randomNumber);
+		w.EndObject();
+	}
+#endif
+#ifdef HAVE_RAPID_JSON
+	static void toJson(std::vector<TeBkUmLpqQwAsyncWorld>& vec, rapidjson::Writer<rapidjson::StringBuffer>& w) {
+		w.StartArray();
+		for(auto el: vec) {
+			el.toJson(w);
+		}
+		w.EndArray();
+	}
+#endif
+};
+
+class TeBkUmLpqQwAsyncFortune {
+	int id;
+public:
+	std::string message_i;
+	std::string_view message;
+	bool allocd;
+	TeBkUmLpqQwAsyncFortune(int id);
+	TeBkUmLpqQwAsyncFortune(int id, std::string message);
+	TeBkUmLpqQwAsyncFortune();
+	virtual ~TeBkUmLpqQwAsyncFortune();
+	int getId() const;
+	void setId(int id);
+	bool operator < (const TeBkUmLpqQwAsyncFortune& other) const;
+};
+
+class TeBkUmLpqQwAsyncMessage {
+	std::string message;
+public:
+	TeBkUmLpqQwAsyncMessage();
+	TeBkUmLpqQwAsyncMessage(std::string message);
+	virtual ~TeBkUmLpqQwAsyncMessage();
+	const std::string& getMessage() const;
+	void setMessage(const std::string& message);
+#ifdef HAVE_RAPID_JSON
+	void toJson(rapidjson::Writer<rapidjson::StringBuffer>& w) {
+		w.StartObject();
+		w.String("message", 7);
+	    w.String(message.c_str(), static_cast<rapidjson::SizeType>(message.length()));
+		w.EndObject();
+	}
+#endif
+};
+
+struct AsyncUpdatesReqWq {
+	float httpVers;
+	bool conn_clos;
+	BaseSocket* sif;
+	LibpqDataSourceImpl* sqli;
+	std::vector<TeBkUmLpqQwAsyncWorld> vec;
+};
+
+class TeBkUmLpqQwAsyncRouter : public Router {
+	static const std::string HELLO_WORLD;
+	static const std::string WORLD;
+	static const std::string WORLD_ONE_QUERY;
+	static const std::string WORLD_ALL_QUERY;
+	static const std::string FORTUNE_ALL_QUERY;
+	static int g_seed;
+
+	static TemplatePtr tmplFunc;
+
+	static Ser m_ser;
+	static Ser w_ser;
+	static SerCont wcont_ser;
+
+	static std::string& getUpdQuery(int count);
+	void dbAsync(BaseSocket* sif);
+	void queriesAsync(const char* q, int ql, BaseSocket* sif);
+	void updatesAsync(const char* q, int ql, AsyncUpdatesReqWq* req);
+	void updatesAsyncb(const char* q, int ql, AsyncUpdatesReqWq* req);
+	void fortunes(BaseSocket* sif);
+
+	void queriesMultiAsync(const char*, int, BaseSocket* sif);
+	void updatesMulti(const char*, int, AsyncUpdatesReqWq*);
+
+	static std::unordered_map<int, std::string> _qC;
+	LibpqDataSourceImpl* sqli;
+protected:
+	virtual LibpqDataSourceImpl* getDb(int max = 0);
+public:
+	TeBkUmLpqQwAsyncRouter& operator=(const TeBkUmLpqQwAsyncRouter& a) {
+		return *this;
+	}
+	TeBkUmLpqQwAsyncRouter(const TeBkUmLpqQwAsyncRouter& a) {
+		sqli = NULL;
+	}
+	TeBkUmLpqQwAsyncRouter();
+	virtual ~TeBkUmLpqQwAsyncRouter();
+	/* These functions are here just for test purposes and serve no purpose START */
+	static void temp() {
+	}
+	virtual void temp1() const {
+	}
+	std::map<std::string, std::string> l(std::map<std::string, std::string> a1, std::map<std::string, std::string> a2) {
+		return std::map<std::string, std::string>();
+	}
+	/* END */
+	bool route(HttpRequest* req, HttpResponse* res, BaseSocket* sif);
+};
+
+class TeBkUmLpqQwAsyncRouterPooled : public TeBkUmLpqQwAsyncRouter {
+	LibpqDataSourceImpl* getDb(int max = 0);
+	std::atomic<int> opt;
+	bool inited;
+	int maxconns;
+	std::vector<LibpqDataSourceImpl*> pool;
+public:
+	TeBkUmLpqQwAsyncRouterPooled& operator=(const TeBkUmLpqQwAsyncRouterPooled& a) {
+		return *this;
+	}
+	TeBkUmLpqQwAsyncRouterPooled(const TeBkUmLpqQwAsyncRouterPooled& a) {
+		this->opt = 0;
+		this->inited = false;
+		this->maxconns = 7;
+	}
+	TeBkUmLpqQwAsyncRouterPooled();
+	virtual ~TeBkUmLpqQwAsyncRouterPooled();
+};
+
+#endif /* WEB_TE_BENCHMARK_UM_INCLUDE_TeBkUmLpqQwAsync_H_ */
diff --git a/web/te-benchmark-um-pq-async-qw/meson.build b/web/te-benchmark-um-pq-async-qw/meson.build
new file mode 100644
index 000000000..257ffb916
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/meson.build
@@ -0,0 +1,15 @@
+
+module_includes = ['/usr/local/include','../../src/modules/common','../../src/modules/cache','../../src/modules/cache/memory','../../src/modules/cache/redis',
+	'../../src/modules/cache/memcached','../../src/modules/client-util','../../src/modules/http','../../src/modules/http/http11','../../src/modules/http/http20',
+	'../../src/modules/ioc','../../src/modules/jobs','../../src/modules/reflection','../../src/modules/sdorm','../../src/modules/sdorm/sql','../../src/modules/sdorm/sql/libpq',
+	'../../src/modules/client-util/ssl','../../src/modules/sdorm/mongo','../../src/modules/sdorm/mongo/raw','../../src/modules/search','../../src/modules/serialization',
+	'../../src/modules/serialization/xml','../../src/modules/serialization/json','../../src/modules/server-util','../../src/modules/ssl','../../src/modules/threads',
+	'../../src/framework','include']
+module_includes += global_includes
+module_libs = [global_libs]
+
+c = run_command(meson_grabber, 'src/')
+module_sources = c.stdout().strip().split('\n')
+
+shared_library('te-benchmark-um-pq-async-qw', module_sources, include_directories: module_includes, dependencies: module_libs, link_with: [libffeadmodules, libffeadframework],
+	install: true, install_dir: bin_dir+'/lib')
\ No newline at end of file
diff --git a/web/te-benchmark-um-pq-async-qw/src/TeBkUmLpqQwAsync.cpp b/web/te-benchmark-um-pq-async-qw/src/TeBkUmLpqQwAsync.cpp
new file mode 100644
index 000000000..f90f06add
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/src/TeBkUmLpqQwAsync.cpp
@@ -0,0 +1,715 @@
+/*
+	Copyright 2009-2020, Sumeet Chhetri
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+/*
+ * TeBkUmLpqQwAsyncUm.cpp
+ *
+ *  Created on: 03-Feb-2020
+ *      Author: sumeetc
+ */
+
+#include "TeBkUmLpqQwAsync.h"
+
+int TeBkUmLpqQwAsyncWorld::getId() const {
+	return id;
+}
+
+void TeBkUmLpqQwAsyncWorld::setId(int id) {
+	this->id = id;
+}
+
+int TeBkUmLpqQwAsyncWorld::getRandomNumber() const {
+	return randomNumber;
+}
+
+void TeBkUmLpqQwAsyncWorld::setRandomNumber(int randomNumber) {
+	this->randomNumber = randomNumber;
+}
+
+TeBkUmLpqQwAsyncWorld::TeBkUmLpqQwAsyncWorld(int id, int randomNumber) {
+	this->id = id;
+	this->randomNumber = randomNumber;
+}
+
+TeBkUmLpqQwAsyncWorld::TeBkUmLpqQwAsyncWorld(int id) {
+	this->id = id;
+	randomNumber = 0;
+}
+
+TeBkUmLpqQwAsyncWorld::TeBkUmLpqQwAsyncWorld() {
+	id = 0;
+	randomNumber = 0;
+}
+
+TeBkUmLpqQwAsyncWorld::~TeBkUmLpqQwAsyncWorld() {
+}
+
+int TeBkUmLpqQwAsyncFortune::getId() const {
+	return id;
+}
+
+void TeBkUmLpqQwAsyncFortune::setId(int id) {
+	this->id = id;
+}
+
+TeBkUmLpqQwAsyncFortune::TeBkUmLpqQwAsyncFortune(int id) {
+	this->id = id;
+	allocd = false;
+}
+
+TeBkUmLpqQwAsyncFortune::TeBkUmLpqQwAsyncFortune(int id, std::string message) {
+	this->id = id;
+	this->message_i = message;
+	this->message = std::string_view(this->message_i);
+	allocd = false;
+}
+
+TeBkUmLpqQwAsyncFortune::TeBkUmLpqQwAsyncFortune() {
+	id = 0;
+	allocd = false;
+}
+
+TeBkUmLpqQwAsyncFortune::~TeBkUmLpqQwAsyncFortune() {
+	if(allocd && message.size()>0) {
+		free((void *)message.data());
+	}
+}
+
+bool TeBkUmLpqQwAsyncFortune::operator < (const TeBkUmLpqQwAsyncFortune& other) const {
+	return message.compare(other.message)<0;
+}
+
+TeBkUmLpqQwAsyncMessage::TeBkUmLpqQwAsyncMessage() {
+}
+
+TeBkUmLpqQwAsyncMessage::TeBkUmLpqQwAsyncMessage(std::string message) {
+	this->message = message;
+}
+
+TeBkUmLpqQwAsyncMessage::~TeBkUmLpqQwAsyncMessage() {
+}
+
+const std::string& TeBkUmLpqQwAsyncMessage::getMessage() const {
+	return message;
+}
+
+void TeBkUmLpqQwAsyncMessage::setMessage(const std::string& message) {
+	this->message = message;
+}
+
+const std::string TeBkUmLpqQwAsyncRouter::HELLO_WORLD = "Hello, World!";
+const std::string TeBkUmLpqQwAsyncRouter::WORLD = "world";
+const std::string TeBkUmLpqQwAsyncRouter::WORLD_ONE_QUERY = "select id,randomnumber from world where id=$1";
+const std::string TeBkUmLpqQwAsyncRouter::WORLD_ALL_QUERY = "select id,randomnumber from world";
+const std::string TeBkUmLpqQwAsyncRouter::FORTUNE_ALL_QUERY = "select id,message from fortune";
+std::unordered_map<int, std::string> TeBkUmLpqQwAsyncRouter::_qC;
+int TeBkUmLpqQwAsyncRouter::g_seed = 0;
+
+void TeBkUmLpqQwAsyncRouter::dbAsync(BaseSocket* sif) {
+	LibpqDataSourceImpl* sqli = getDb(5);
+	int rid = CommonUtils::fastrand(g_seed) % 10000 + 1;
+	LibpqAsyncReq* areq = sqli->getAsyncRequest();
+	LibpqQuery* q = areq->getQuery();
+	q->withParamInt4(rid);
+#ifdef HAVE_LIBPQ
+	q->withSelectQuery(WORLD_ONE_QUERY, true).withContext(sif).withCb0([](void* ctx, PGresult* res) {
+		BaseSocket* sif = (BaseSocket*)ctx;
+
+		TeBkUmLpqQwAsyncWorld* wo = new TeBkUmLpqQwAsyncWorld;
+		int cols = PQnfields(res);
+		for (int j = 0; j < cols; ++j) {
+			if(j==0)wo->setId(ntohl(*((uint32_t *) PQgetvalue(res, 0, j))));
+			else wo->setRandomNumber(ntohl(*((uint32_t *) PQgetvalue(res, 0, j))));
+		}
+
+		sif->queueWrite([](BaseSocket* sif, void* arg) {
+			TeBkUmLpqQwAsyncWorld* wo = (TeBkUmLpqQwAsyncWorld*)arg;
+
+			HttpResponse r;
+			std::string h;
+	#ifdef HAVE_RAPID_JSON
+			rapidjson::StringBuffer s;
+			rapidjson::Writer<rapidjson::StringBuffer> w(s);
+			wo->toJson(w);
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false, s.GetSize());
+			sif->writeDirect(h, s.GetString(), s.GetSize());
+	#else
+			JSONSerialize::serializeObject(wo, w_ser, r.getContentP());
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON);
+			sif->writeDirect(h, r.getContent());
+	#endif
+			sif->unUse();
+			delete wo;
+		}, wo);
+	});
+#endif
+	sqli->postAsync(areq);
+}
+
+void TeBkUmLpqQwAsyncRouter::queriesAsync(const char* q, int ql, BaseSocket* sif) {
+	int queryCount = 0;
+	CommonUtils::fastStrToNum(q, ql, queryCount);
+	queryCount = std::max(1, std::min(queryCount, 500));
+
+	LibpqDataSourceImpl* sqli = getDb(3);
+	LibpqAsyncReq* areq = sqli->getAsyncRequest();
+	for (int c = 0; c < queryCount; ++c) {
+		int rid = CommonUtils::fastrand(g_seed) % 10000 + 1;
+		LibpqQuery* q = areq->getQuery();
+		q->withParamInt4(rid);
+		q->withSelectQuery(WORLD_ONE_QUERY);
+	}
+#ifdef HAVE_LIBPQ
+	areq->withFinalCb(sif, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& q, int counter) {
+		BaseSocket* sif = (BaseSocket*)ctx;
+		std::vector<TeBkUmLpqQwAsyncWorld>* vec = new std::vector<TeBkUmLpqQwAsyncWorld>;
+		vec->reserve(results->size());
+		for (int i = 0; i < (int)results->size(); ++i) {
+			PGresult* res = results->at(i);
+			int cols = PQnfields(res);
+			for (int j = 0; j < cols; ++j) {
+				if(j==0) vec->emplace_back(ntohl(*((uint32_t *) PQgetvalue(res, 0, j))));
+				else vec->back().setRandomNumber(ntohl(*((uint32_t *) PQgetvalue(res, 0, j))));
+			}
+		}
+
+		sif->queueWrite([](BaseSocket* sif, void* arg) {
+			std::vector<TeBkUmLpqQwAsyncWorld>* vec = (std::vector<TeBkUmLpqQwAsyncWorld>*)arg;
+
+			HttpResponse r;
+			std::string h;
+	#ifdef HAVE_RAPID_JSON
+			rapidjson::StringBuffer s;
+			rapidjson::Writer<rapidjson::StringBuffer> w(s);
+			TeBkUmLpqQwAsyncWorld::toJson(*vec, w);
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false, s.GetSize());
+			sif->writeDirect(h, s.GetString(), s.GetSize());
+	#else
+			JSONSerialize::serializeObjectCont(vec, wcont_ser, "vector", r.getContentP());
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false);
+			sif->writeDirect(h, r.getContent());
+	#endif
+			sif->unUse();
+
+			delete vec;
+		}, vec);
+	});
+#endif
+	sqli->postAsync(areq);
+}
+
+void TeBkUmLpqQwAsyncRouter::queriesMultiAsync(const char* q, int ql, BaseSocket* sif) {
+	int queryCount = 0;
+	CommonUtils::fastStrToNum(q, ql, queryCount);
+	queryCount = std::max(1, std::min(queryCount, 500));
+
+	LibpqDataSourceImpl* sqli = getDb(3);
+
+	std::stringstream ss;
+	for (int c = 0; c < queryCount; ++c) {
+		int rid = CommonUtils::fastrand(g_seed) % 10000 + 1;
+		ss << "select id, randomnumber from world where id = " << rid << ";";
+	}
+
+	LibpqAsyncReq* areq = sqli->getAsyncRequest();
+	LibpqQuery* qu = areq->getQuery();
+	qu->withSelectQuery(ss.str()).withMulti();
+#ifdef HAVE_LIBPQ
+	areq->withFinalCb(sif, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& q, int counter) {
+		BaseSocket* sif = (BaseSocket*)ctx;
+		std::vector<TeBkUmLpqQwAsyncWorld>* vec = new std::vector<TeBkUmLpqQwAsyncWorld>;
+		vec->reserve(results->size());
+		for (int i = 0; i < (int)results->size(); ++i) {
+			PGresult* res = results->at(i);
+			int cols = PQnfields(res);
+			for (int j = 0; j < cols; ++j) {
+				int tmp = 0;
+				CommonUtils::fastStrToNum(PQgetvalue(res, 0, j), PQgetlength(res, 0, j), tmp);
+				if(j==0) vec->emplace_back(tmp);
+				else vec->back().setRandomNumber(tmp);
+			}
+		}
+
+		sif->queueWrite([](BaseSocket* sif, void* arg) {
+			std::vector<TeBkUmLpqQwAsyncWorld>* vec = (std::vector<TeBkUmLpqQwAsyncWorld>*)arg;
+
+			HttpResponse r;
+			std::string h;
+	#ifdef HAVE_RAPID_JSON
+			rapidjson::StringBuffer s;
+			rapidjson::Writer<rapidjson::StringBuffer> w(s);
+			TeBkUmLpqQwAsyncWorld::toJson(*vec, w);
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false, s.GetSize());
+			sif->writeDirect(h, s.GetString(), s.GetSize());
+	#else
+			JSONSerialize::serializeObjectCont(vec, wcont_ser, "vector", r.getContentP());
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false);
+			sif->writeDirect(h, r.getContent());
+	#endif
+			sif->unUse();
+
+			delete vec;
+		}, vec);
+	});
+#endif
+	sqli->postAsync(areq, queryCount);
+}
+
+void TeBkUmLpqQwAsyncRouter::updatesMulti(const char* q, int ql, AsyncUpdatesReqWq* req) {
+	int queryCount = 0;
+	CommonUtils::fastStrToNum(q, ql, queryCount);
+	queryCount = std::max(1, std::min(queryCount, 500));
+
+	req->vec.reserve(queryCount);
+	req->sqli = getDb(3);
+
+	std::stringstream ss;
+	for (int c = 0; c < queryCount; ++c) {
+		int rid = CommonUtils::fastrand(g_seed) % 10000 + 1;
+		ss << "select id, randomnumber from world where id = " << rid << ";";
+	}
+
+	//req->ss << "begin;";//NEVER USE - this creates a deadlock issue (like, DETAIL:  Process 16 waits for ShareLock on transaction 995; blocked by process 19.)
+	LibpqAsyncReq* areq = req->sqli->getAsyncRequest();
+	LibpqQuery* qu = areq->getQuery();
+	qu->withSelectQuery(ss.str()).withMulti();
+#ifdef HAVE_LIBPQ
+	areq->withFinalCb(req, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& q, int counter) {
+		AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)ctx;
+		if(status) {
+			int queryCount = (int)results->size();
+
+			std::stringstream ss;
+			for (int i = 0; i < queryCount; ++i) {
+				PGresult* res = results->at(i);
+				int cols = PQnfields(res);
+				for (int j = 0; j < cols; ++j) {
+					int tmp = 0;
+					CommonUtils::fastStrToNum(PQgetvalue(res, 0, j), PQgetlength(res, 0, j), tmp);
+					if(j==0) req->vec.emplace_back(tmp);
+					else {
+						TeBkUmLpqQwAsyncWorld& w = req->vec.back();
+						int newRandomNumber = CommonUtils::fastrand(g_seed) % 10000 + 1;
+						if(tmp == newRandomNumber) {
+							newRandomNumber += 1;
+							if(newRandomNumber>=10000) {
+								newRandomNumber = 1;
+							}
+						}
+						w.setRandomNumber(newRandomNumber);
+						ss << "begin;update world set randomnumber = " << newRandomNumber << " where id = " << w.getId() << ";commit;";
+					}
+				}
+			}
+
+			LibpqAsyncReq* areq = req->sqli->getAsyncRequest();
+			LibpqQuery* qu = areq->getQuery();
+			qu->withUpdateQuery(ss.str()).withMulti();
+
+			areq->withFinalCb(req, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& q, int counter) {
+				AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)ctx;
+				req->sif->queueWrite([](BaseSocket* sif, void* arg) {
+					AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)arg;
+					if(req->vec.size()>0) {
+						HttpResponse r;
+						std::string h;
+	#ifdef HAVE_RAPID_JSON
+						rapidjson::StringBuffer s;
+						rapidjson::Writer<rapidjson::StringBuffer> w(s);
+						TeBkUmLpqQwAsyncWorld::toJson(req->vec, w);
+						r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false, s.GetSize());
+						req->sif->writeDirect(h, s.GetString(), s.GetSize());
+	#else
+						JSONSerialize::serializeObjectCont(&req->vec, wcont_ser, "vector", r.getContentP());
+						r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false);
+						req->sif->writeDirect(h, r.getContent());
+	#endif
+					} else {
+						HttpResponse r;
+						std::string h;
+						r.httpStatus(HTTPResponseStatus::InternalServerError).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, req->httpVers, true);
+						req->sif->writeDirect(h);
+					}
+					req->sif->unUse();
+					delete req;
+				}, ctx);
+			});
+			req->sqli->postAsync(areq, queryCount*3);
+		}
+	});
+#endif
+	req->sqli->postAsync(areq, queryCount);
+}
+
+std::string& TeBkUmLpqQwAsyncRouter::getUpdQuery(int count) {
+	std::unordered_map<int, std::string>::iterator it = _qC.find(count);
+	if(it!=_qC.end()) {
+		return it->second;
+	}
+
+	std::stringstream ss;
+	ss << "update world as t set randomnumber = case id";
+
+	int pc = 1;
+	for (int c = 0; c < count; ++c) {
+		ss << " when $";
+		ss << pc++;
+		ss << " then $";
+		ss << pc++;
+	}
+	ss << " else randomnumber end where id in (";
+	for (int c = 0; c < count; ++c) {
+		ss << "$" << pc++ << ",";
+	}
+	std::string q = ss.str();
+	q = q.substr(0, q.length()-1);
+	q += ")";
+
+	_qC[count] = std::move(q);
+	return _qC[count];
+}
+void TeBkUmLpqQwAsyncRouter::updatesAsyncb(const char* q, int ql, AsyncUpdatesReqWq* req) {
+	int queryCount = 0;
+	CommonUtils::fastStrToNum(q, ql, queryCount);
+	queryCount = std::max(1, std::min(queryCount, 500));
+
+	req->vec.reserve(queryCount);
+	req->sqli = getDb(3);
+
+	LibpqAsyncReq* areq = req->sqli->getAsyncRequest();
+	for (int c = 0; c < queryCount; ++c) {
+		int rid = CommonUtils::fastrand(g_seed) % 10000 + 1;
+		LibpqQuery* q = areq->getQuery();
+		q->withParamInt4(rid);
+		q->withSelectQuery(WORLD_ONE_QUERY);
+	}
+#ifdef HAVE_LIBPQ
+	areq->withFinalCb(req, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& query, int counter) {
+		AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)ctx;
+
+		int queryCount = (int)results->size();
+
+		LibpqAsyncReq* areq = req->sqli->getAsyncRequest();
+		req->sqli->beginAsync(areq);
+		LibpqQuery* q = areq->getQuery();
+		q->withUpdateQuery(getUpdQuery(queryCount)).withContext(req);
+
+		for (int i = 0; i < queryCount; ++i) {
+			PGresult* res = results->at(i);
+			int cols = PQnfields(res);
+			for (int j = 0; j < cols; ++j) {
+				if(j==0) req->vec.emplace_back(ntohl(*((uint32_t *) PQgetvalue(res, 0, j))));
+				else {
+					int tmp = ntohl(*((uint32_t *) PQgetvalue(res, 0, j)));
+					TeBkUmLpqQwAsyncWorld& w = req->vec.back();
+					int newRandomNumber = CommonUtils::fastrand(g_seed) % 10000 + 1;
+					if(tmp == newRandomNumber) {
+						newRandomNumber += 1;
+						if(newRandomNumber>=10000) {
+							newRandomNumber = 1;
+						}
+					}
+					w.setRandomNumber(newRandomNumber);
+					q->withParamInt4(w.getId());
+					q->withParamInt4(w.getRandomNumber());
+				}
+			}
+		}
+		for(auto w: req->vec) {
+			q->withParamInt4(w.getId());
+		}
+		req->sqli->commitAsync(areq);
+
+		areq->withFinalCb(req, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& query, int counter) {
+			AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)ctx;
+			req->sif->queueWrite([](BaseSocket* sif, void* arg) {
+				AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)arg;
+				if(req->vec.size()>0) {
+					HttpResponse r;
+					std::string h;
+	#ifdef HAVE_RAPID_JSON
+					rapidjson::StringBuffer s;
+					rapidjson::Writer<rapidjson::StringBuffer> w(s);
+					TeBkUmLpqQwAsyncWorld::toJson(req->vec, w);
+					r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false, s.GetSize());
+					req->sif->writeDirect(h, s.GetString(), s.GetSize());
+	#else
+					JSONSerialize::serializeObjectCont(&req->vec, wcont_ser, "vector", r.getContentP());
+					r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false);
+					req->sif->writeDirect(h, r.getContent());
+	#endif
+				} else {
+					HttpResponse r;
+					std::string h;
+					r.httpStatus(HTTPResponseStatus::InternalServerError).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, req->httpVers, true);
+					req->sif->writeDirect(h);
+				}
+				req->sif->unUse();
+				delete req;
+			}, ctx);
+		});
+		req->sqli->postAsync(areq);
+	});
+#endif
+	req->sqli->postAsync(areq);
+}
+
+void TeBkUmLpqQwAsyncRouter::updatesAsync(const char* q, int ql, AsyncUpdatesReqWq* req) {
+	int queryCount = 0;
+	CommonUtils::fastStrToNum(q, ql, queryCount);
+	queryCount = std::max(1, std::min(queryCount, 500));
+
+	req->vec.reserve(queryCount);
+
+	req->sqli = getDb(3);
+
+	LibpqAsyncReq* areq = req->sqli->getAsyncRequest();
+	for (int c = 0; c < queryCount; ++c) {
+		int rid = CommonUtils::fastrand(g_seed) % 10000 + 1;
+		LibpqQuery* qu = areq->getQuery();
+		qu->withParamInt4(rid);
+		qu->withSelectQuery(WORLD_ONE_QUERY);
+	}
+#ifdef HAVE_LIBPQ
+	areq->withFinalCb(req, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& query, int counter) {
+		AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)ctx;
+		LibpqAsyncReq* areq = req->sqli->getAsyncRequest();
+
+		for (int i = 0; i < (int)results->size(); ++i) {
+			PGresult* res = results->at(i);
+			int cols = PQnfields(res);
+			for (int j = 0; j < cols; ++j) {
+				if(j==0) req->vec.emplace_back(ntohl(*((uint32_t *) PQgetvalue(res, 0, j))));
+				else {
+					int tmp = ntohl(*((uint32_t *) PQgetvalue(res, 0, j)));
+					TeBkUmLpqQwAsyncWorld& w = req->vec.back();
+					int newRandomNumber = CommonUtils::fastrand(g_seed) % 10000 + 1;
+					if(tmp == newRandomNumber) {
+						newRandomNumber += 1;
+						if(newRandomNumber>=10000) {
+							newRandomNumber = 1;
+						}
+					}
+					w.setRandomNumber(newRandomNumber);
+
+					std::stringstream ss;
+					ss << "update world set randomnumber = " << newRandomNumber << " where id = " << w.getId();
+
+					req->sqli->beginAsync(areq);
+					LibpqQuery* q = areq->getQuery();
+					q->withUpdateQuery(ss.str(), false);
+					req->sqli->commitAsync(areq);
+				}
+			}
+		}
+
+		areq->withFinalCb(req, [](void* ctx, bool status, std::vector<PGresult*>* results, const std::string& query, int counter) {
+			AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)ctx;
+			req->sif->queueWrite([](BaseSocket* sif, void* arg) {
+				AsyncUpdatesReqWq* req = (AsyncUpdatesReqWq*)arg;
+				if(req->vec.size()>0) {
+					HttpResponse r;
+					std::string h;
+	#ifdef HAVE_RAPID_JSON
+					rapidjson::StringBuffer s;
+					rapidjson::Writer<rapidjson::StringBuffer> w(s);
+					TeBkUmLpqQwAsyncWorld::toJson(req->vec, w);
+					r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false, s.GetSize());
+					req->sif->writeDirect(h, s.GetString(), s.GetSize());
+	#else
+					JSONSerialize::serializeObjectCont(&req->vec, wcont_ser, "vector", r.getContentP());
+					r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, 1.1, false);
+					req->sif->writeDirect(h, r.getContent());
+	#endif
+				} else {
+					HttpResponse r;
+					std::string h;
+					r.httpStatus(HTTPResponseStatus::InternalServerError).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_APPLICATION_JSON, req->httpVers, true);
+					req->sif->writeDirect(h);
+				}
+				req->sif->unUse();
+				delete req;
+			}, ctx);
+		});
+		req->sqli->postAsync(areq);
+	});
+#endif
+	req->sqli->postAsync(areq);
+}
+
+void TeBkUmLpqQwAsyncRouter::fortunes(BaseSocket* sif) {
+	LibpqDataSourceImpl* sqli = getDb(7);
+	LibpqAsyncReq* areq = sqli->getAsyncRequest();
+	LibpqQuery* q = areq->getQuery();
+#ifdef HAVE_LIBPQ
+	q->withSelectQuery(FORTUNE_ALL_QUERY).withContext(sif).withCb0([](void* ctx, PGresult* res) {
+		BaseSocket* sif = (BaseSocket*)ctx;
+
+		std::list<TeBkUmLpqQwAsyncFortune>* flst = new std::list<TeBkUmLpqQwAsyncFortune>;
+		int cols = PQnfields(res);
+		int rows = PQntuples(res);
+		for(int i=0; i<rows; i++) {
+			for (int j = 0; j < cols; ++j) {
+				if(j==0) {
+					flst->emplace_back(ntohl(*((uint32_t *) PQgetvalue(res, i, j))));
+				} else {
+					TeBkUmLpqQwAsyncFortune& w = flst->back();
+					w.message = CryptoHandler::sanitizeHtmlFast((const uint8_t *)PQgetvalue(res, i, j), (size_t)PQgetlength(res, i, j), w.message_i, w.allocd);
+				}
+			}
+		}
+
+		sif->queueWrite([](BaseSocket* sif, void* arg) {
+			std::list<TeBkUmLpqQwAsyncFortune>* flst = (std::list<TeBkUmLpqQwAsyncFortune>*)arg;
+
+			Context context;
+
+			flst->emplace_back(0, "Additional fortune added at request time.");
+			flst->sort();
+
+			context.emplace("fortunes", flst);
+
+			fcpstream str;
+			tmplFunc(&context, str);
+			std::string out = str.str();
+			HttpResponse r;
+			std::string h;
+			r.httpStatus(HTTPResponseStatus::Ok).generateHeadResponse(h, ContentTypes::CONTENT_TYPE_TEXT_HTML, 1.1, false, (int)out.length());
+			sif->writeDirect(h, out);
+			sif->unUse();
+
+			delete flst;
+		}, flst);
+	});
+#endif
+	sqli->postAsync(areq);
+}
+
+bool TeBkUmLpqQwAsyncRouter::route(HttpRequest* req, HttpResponse* res, BaseSocket* sif) {
+	sif->use();
+	if(StringUtil::endsWith(req->getPath(), "/db")) {
+		dbAsync(sif);
+	} else if(StringUtil::endsWith(req->getPath(), "/queries")) {
+		struct yuarel_param params[1];
+		yuarel_parse_query((char*)req->getQueryStr().data(), req->getQueryStr().size(), params, 1);
+		queriesAsync(params[0].val, params[0].val_len, sif);
+	} else if(StringUtil::endsWith(req->getPath(), "/queriem")) {
+		struct yuarel_param params[1];
+		yuarel_parse_query((char*)req->getQueryStr().data(), req->getQueryStr().size(), params, 1);
+		queriesMultiAsync(params[0].val, params[0].val_len, sif);
+	} else if(StringUtil::endsWith(req->getPath(), "/updatem")) {
+		struct yuarel_param params[1];
+		yuarel_parse_query((char*)req->getQueryStr().data(), req->getQueryStr().size(), params, 1);
+		AsyncUpdatesReqWq* ar = new AsyncUpdatesReqWq;
+		ar->sif = sif;
+		ar->httpVers = req->getHttpVers();
+		ar->conn_clos = req->isClose();
+		updatesMulti(params[0].val, params[0].val_len, ar);
+	} else if(StringUtil::endsWith(req->getPath(), "/fortunes")) {
+		fortunes(sif);
+	} else if(StringUtil::endsWith(req->getPath(), "/bupdates") || StringUtil::endsWith(req->getPath(), "/updates")) {
+		struct yuarel_param params[1];
+		yuarel_parse_query((char*)req->getQueryStr().data(), req->getQueryStr().size(), params, 1);
+		AsyncUpdatesReqWq* ar = new AsyncUpdatesReqWq;
+		ar->sif = sif;
+		ar->httpVers = req->getHttpVers();
+		ar->conn_clos = req->isClose();
+		updatesAsyncb(params[0].val, params[0].val_len, ar);
+	} else if(StringUtil::endsWith(req->getPath(), "/update_")) {
+		struct yuarel_param params[1];
+		yuarel_parse_query((char*)req->getQueryStr().data(), req->getQueryStr().size(), params, 1);
+		AsyncUpdatesReqWq* ar = new AsyncUpdatesReqWq;
+		ar->sif = sif;
+		ar->httpVers = req->getHttpVers();
+		ar->conn_clos = req->isClose();
+		updatesAsync(params[0].val, params[0].val_len, ar);
+	} else {
+		std::string h;
+		res->httpStatus(HTTPResponseStatus::NotFound).generateHeadResponse(h, req->getHttpVers(), true);
+		sif->writeDirect(h);
+		sif->unUse();
+	}
+	return false;
+}
+
+TemplatePtr TeBkUmLpqQwAsyncRouter::tmplFunc;
+Ser TeBkUmLpqQwAsyncRouter::m_ser;
+Ser TeBkUmLpqQwAsyncRouter::w_ser;
+SerCont TeBkUmLpqQwAsyncRouter::wcont_ser;
+
+TeBkUmLpqQwAsyncRouter::TeBkUmLpqQwAsyncRouter() {
+	sqli = NULL;
+	tmplFunc = TemplateUtil::getTemplateFunc("te-benchmark-um-pq-async-qw", "tpe/fortunes.tpe");
+	m_ser = Serializer::getSerFuncForObject("te-benchmark-um-pq-async-qw", "TeBkUmLpqQwAsyncMessage");
+	w_ser = Serializer::getSerFuncForObject("te-benchmark-um-pq-async-qw", "TeBkUmLpqQwAsyncWorld");
+	wcont_ser = Serializer::getSerFuncForObjectCont("te-benchmark-um-pq-async-qw", "TeBkUmLpqQwAsyncWorld", "std::vector");
+}
+
+TeBkUmLpqQwAsyncRouter::~TeBkUmLpqQwAsyncRouter() {
+	if(sqli!=NULL) {
+		DataSourceManager::cleanRawImpl(sqli);
+	}
+}
+
+LibpqDataSourceImpl* TeBkUmLpqQwAsyncRouter::getDb(int max) {
+	if(sqli==NULL) {
+		sqli = static_cast<LibpqDataSourceImpl*>(DataSourceManager::getRawImpl("PostgreSQL-DSN", "te-benchmark-um-pq-async-qw"));
+	}
+	return sqli;
+}
+
+LibpqDataSourceImpl* TeBkUmLpqQwAsyncRouterPooled::getDb(int max) {
+	if(max==0) {
+		max = maxconns;
+	} else {
+		max = std::min(max, maxconns);
+	}
+	int pc = 0;
+	if(inited) {
+		pc = ++opt;
+		if(pc>=INT_MAX-1) {
+			opt = 0;
+		}
+	} else {
+		for (int var = 0; var < maxconns; ++var) {
+			pool.push_back(static_cast<LibpqDataSourceImpl*>(DataSourceManager::getRawImpl("PostgreSQL-DSN", "te-benchmark-um-pq-async-qw", true)));
+		}
+		inited = true;
+	}
+	return pool.at(pc%max);
+}
+
+TeBkUmLpqQwAsyncRouterPooled::TeBkUmLpqQwAsyncRouterPooled() {
+	maxconns = 7;
+	propMap props = ConfigurationData::getAppProperties();
+	if(props.size()>0) {
+		if(props.find("dbpoolsize")!=props.end()) {
+			try {
+				maxconns = CastUtil::toInt(props["dbpoolsize"]);
+			} catch(...) {
+			}
+		}
+	}
+	inited = false;
+	opt = 0;
+}
+
+TeBkUmLpqQwAsyncRouterPooled::~TeBkUmLpqQwAsyncRouterPooled() {
+	for(auto sqli: pool) {
+		if(sqli!=NULL) {
+			DataSourceManager::cleanRawImpl(sqli);
+		}
+	}
+}
diff --git a/web/te-benchmark-um-pq-async-qw/src/autotools/Makefile.am b/web/te-benchmark-um-pq-async-qw/src/autotools/Makefile.am
new file mode 100644
index 000000000..22bc8aed2
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/src/autotools/Makefile.am
@@ -0,0 +1,17 @@
+AUTOMAKE_OPTIONS = subdir-objects
+ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
+
+AM_CPPFLAGS=-I"../../../../include" -I"../../include"
+
+packageIdentifier=${PACKAGE_NAME}-${PACKAGE_VERSION}-bin
+distdir=${PACKAGE_NAME}-${PACKAGE_VERSION}-src
+fprefix=../../../../${packageIdentifier}
+prefix=${abs_builddir}
+
+lib_LTLIBRARIES = libte-benchmark-um-pq-async-qw.la
+libte_benchmark_um_pq_async_la_SOURCES = ../TeBkUmLpqQwAsync.cpp
+
+libte_benchmark_um_pq_async_la_LDFLAGS = -no-undefined 
+libte_benchmark_um_pq_async_la_LIBADD = -L"${fprefix}/lib" -lffead-modules -lffead-framework
+
+#dist_noinst_SCRIPTS = autogen.sh
\ No newline at end of file
diff --git a/web/te-benchmark-um-pq-async-qw/tpe/fortunes.tpe b/web/te-benchmark-um-pq-async-qw/tpe/fortunes.tpe
new file mode 100644
index 000000000..805c3730a
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/tpe/fortunes.tpe
@@ -0,0 +1,13 @@
+#declareref std::list<TeBkUmLpqQwAsyncFortune>* fortunes#
+<!DOCTYPE html>
+<html>
+<head><title>Codestin Search App</title></head>
+<body>
+<table>
+<tr><th>id</th><th>message</th></tr>
+#for(std::list<TeBkUmLpqQwAsyncFortune>::iterator it=fortunes->begin(); it != fortunes->end(); ++it)#
+<tr><td>${(*it).getId()}</td><td>${(*it).message}</td></tr>
+#rof#
+</table>
+</body>
+</html>
\ No newline at end of file
diff --git a/web/te-benchmark-um-pq-async-qw/xmake.lua b/web/te-benchmark-um-pq-async-qw/xmake.lua
new file mode 100644
index 000000000..2301f46fc
--- /dev/null
+++ b/web/te-benchmark-um-pq-async-qw/xmake.lua
@@ -0,0 +1,12 @@
+add_includedirs("include/")
+
+local bindir = "$(projectdir)/ffead-cpp-6.0-bin"
+
+target("te-benchmark-um-pq-async-qw")
+	set_languages("c++17")
+	add_deps("ffead-framework")
+	add_options(getOptions())
+	set_kind("shared")
+	on_load(setIncludes)
+	add_files("src/*.cpp")
+	set_installdir(bindir)
diff --git a/xmake.lua b/xmake.lua
index 9c953fa09..14a684e4f 100644
--- a/xmake.lua
+++ b/xmake.lua
@@ -685,6 +685,7 @@ includes("web/te-benchmark-um")
 includes("web/te-benchmark-um-mgr")
 includes("web/te-benchmark-um-pq")
 includes("web/te-benchmark-um-pq-async")
+includes("web/te-benchmark-um-pq-async-qw")
 
 target("ffead-cpp")
 	set_languages("c++17")