Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Fixes changes in 3.7 Multiprocessing for Import/Export and dump/restore #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ test-ci:
@killall rebirthdb

test-remote:
curl -qo ${REMOTE_TEST_SETUP_NAME} ${REMOTE_TEST_SETUP_URL}
python ${REMOTE_TEST_SETUP_NAME} pytest -m integration

install-db:
Expand Down
2 changes: 0 additions & 2 deletions requirements-dev.txt

This file was deleted.

43 changes: 26 additions & 17 deletions rethinkdb/_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import ctypes
import datetime
import json
import multiprocessing as mp
import multiprocessing
import numbers
import optparse
import os
Expand All @@ -35,6 +35,8 @@
import traceback
from multiprocessing.queues import SimpleQueue

import six

from rethinkdb import errors, query, utils_common
from rethinkdb.logger import default_logger

Expand Down Expand Up @@ -259,12 +261,16 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind
with sindex_counter.get_lock():
sindex_counter.value += len(table_info["indexes"])
# -- start the writer
ctx = mp.get_context(mp.get_start_method())
task_queue = SimpleQueue(ctx=ctx)
if six.PY3:
ctx = multiprocessing.get_context(multiprocessing.get_start_method())
task_queue = SimpleQueue(ctx=ctx)
else:
task_queue = SimpleQueue()

writer = None
if options.format == "json":
filename = directory + "/%s/%s.json" % (db, table)
writer = mp.Process(
writer = multiprocessing.Process(
target=json_writer,
args=(
filename,
Expand All @@ -274,7 +280,7 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind
options.format))
elif options.format == "csv":
filename = directory + "/%s/%s.csv" % (db, table)
writer = mp.Process(
writer = multiprocessing.Process(
target=csv_writer,
args=(
filename,
Expand All @@ -284,7 +290,7 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind
error_queue))
elif options.format == "ndjson":
filename = directory + "/%s/%s.ndjson" % (db, table)
writer = mp.Process(
writer = multiprocessing.Process(
target=json_writer,
args=(
filename,
Expand Down Expand Up @@ -389,13 +395,16 @@ def update_progress(progress_info, options):

def run_clients(options, workingDir, db_table_set):
# Spawn one client for each db.table, up to options.clients at a time
exit_event = mp.Event()
exit_event = multiprocessing.Event()
processes = []
ctx = mp.get_context(mp.get_start_method())
error_queue = SimpleQueue(ctx=ctx)
interrupt_event = mp.Event()
sindex_counter = mp.Value(ctypes.c_longlong, 0)
hook_counter = mp.Value(ctypes.c_longlong, 0)
if six.PY3:
ctx = multiprocessing.get_context(multiprocessing.get_start_method())
error_queue = SimpleQueue(ctx=ctx)
else:
error_queue = SimpleQueue()
interrupt_event = multiprocessing.Event()
sindex_counter = multiprocessing.Value(ctypes.c_longlong, 0)
hook_counter = multiprocessing.Value(ctypes.c_longlong, 0)

signal.signal(signal.SIGINT, lambda a, b: abort_export(a, b, exit_event, interrupt_event))
errors = []
Expand All @@ -407,8 +416,8 @@ def run_clients(options, workingDir, db_table_set):

tableSize = int(options.retryQuery("count", query.db(db).table(table).info()['doc_count_estimates'].sum()))

progress_info.append((mp.Value(ctypes.c_longlong, 0),
mp.Value(ctypes.c_longlong, tableSize)))
progress_info.append((multiprocessing.Value(ctypes.c_longlong, 0),
multiprocessing.Value(ctypes.c_longlong, tableSize)))
arg_lists.append((db, table,
workingDir,
options,
Expand All @@ -430,9 +439,9 @@ def run_clients(options, workingDir, db_table_set):
processes = [process for process in processes if process.is_alive()]

if len(processes) < options.clients and len(arg_lists) > 0:
newProcess = mp.Process(target=export_table, args=arg_lists.pop(0))
newProcess.start()
processes.append(newProcess)
new_process = multiprocessing.Process(target=export_table, args=arg_lists.pop(0))
new_process.start()
processes.append(new_process)

update_progress(progress_info, options)

Expand Down
39 changes: 23 additions & 16 deletions rethinkdb/_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@
import csv
import ctypes
import json
import multiprocessing as mp
import multiprocessing
import optparse
import os
import signal
import sys
import time
import traceback
import six
from multiprocessing.queues import Queue, SimpleQueue

from rethinkdb import ast, errors, query, utils_common
Expand Down Expand Up @@ -110,12 +111,12 @@ def __init__(
self.query_runner = query_runner

# reporting information
self._bytes_size = mp.Value(ctypes.c_longlong, -1)
self._bytes_read = mp.Value(ctypes.c_longlong, -1)
self._bytes_size = multiprocessing.Value(ctypes.c_longlong, -1)
self._bytes_read = multiprocessing.Value(ctypes.c_longlong, -1)

self._total_rows = mp.Value(ctypes.c_longlong, -1)
self._rows_read = mp.Value(ctypes.c_longlong, 0)
self._rows_written = mp.Value(ctypes.c_longlong, 0)
self._total_rows = multiprocessing.Value(ctypes.c_longlong, -1)
self._rows_read = multiprocessing.Value(ctypes.c_longlong, 0)
self._rows_written = multiprocessing.Value(ctypes.c_longlong, 0)

# source
if hasattr(source, 'read'):
Expand Down Expand Up @@ -1083,15 +1084,21 @@ def import_tables(options, sources, files_ignored=None):

tables = dict(((x.db, x.table), x) for x in sources) # (db, table) => table

ctx = mp.get_context(mp.get_start_method())
if six.PY3:
ctx = multiprocessing.get_context(multiprocessing.get_start_method())
error_queue = SimpleQueue(ctx=ctx)
warning_queue = SimpleQueue(ctx=ctx)
timing_queue = SimpleQueue(ctx=ctx)
else:
error_queue = SimpleQueue()
warning_queue = SimpleQueue()
timing_queue = SimpleQueue()

max_queue_size = options.clients * 3
work_queue = mp.Manager().Queue(max_queue_size)
error_queue = SimpleQueue(ctx=ctx)
warning_queue = SimpleQueue(ctx=ctx)
exit_event = mp.Event()
interrupt_event = mp.Event()
work_queue = multiprocessing.Manager().Queue(max_queue_size)

timing_queue = SimpleQueue(ctx=ctx)
exit_event = multiprocessing.Event()
interrupt_event = multiprocessing.Event()

errors = []
warnings = []
Expand Down Expand Up @@ -1168,7 +1175,7 @@ def drain_queues():
try:
# - start the progress bar
if not options.quiet:
progress_bar = mp.Process(
progress_bar = multiprocessing.Process(
target=update_progress,
name="progress bar",
args=(sources, options.debug, exit_event, progress_bar_sleep)
Expand All @@ -1180,7 +1187,7 @@ def drain_queues():
writers = []
pools.append(writers)
for i in range(options.clients):
writer = mp.Process(
writer = multiprocessing.Process(
target=table_writer,
name="table writer %d" %
i,
Expand All @@ -1204,7 +1211,7 @@ def drain_queues():
# add a workers to fill up the readers pool
while len(readers) < options.clients:
table = next(file_iter)
reader = mp.Process(
reader = multiprocessing.Process(
target=table.read_to_queue,
name="table reader %s.%s" %
(table.db,
Expand Down
8 changes: 4 additions & 4 deletions rethinkdb/utils_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def check_minimum_version(options, minimum_version='1.6'):
version_string = options.retryQuery('get server version', query.db(
'rethinkdb').table('server_status')[0]['process']['version'])

matches = re.match(r'rethinkdb (?P<version>(\d+)\.(\d+)\.(\d+)).*', version_string)
matches = re.match(r'(rethinkdb|rebirthdb) (?P<version>(\d+)\.(\d+)\.(\d+)).*', version_string)

if not matches:
raise RuntimeError("invalid version string format: %s" % version_string)
Expand Down Expand Up @@ -285,11 +285,11 @@ def take_action(self, action, dest, opt, value, values, parser):
'--connect',
dest='driver_port',
metavar='HOST:PORT',
help='host and client port of a rethinkdb node to connect (default: localhost:%d)' %
net.DEFAULT_PORT,
help='host and client port of a rethinkdb node to connect (default: localhost:%d)' % net.DEFAULT_PORT,
action='callback',
callback=combined_connect_action,
type='string')
type='str'
)
connection_group.add_option(
'--driver-port',
dest='driver_port',
Expand Down
9 changes: 5 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@
if MATCH.group("post"):
VERSION += "." + MATCH.group("post")

with open("rethinkdb/version.py", "w") as ostream:
print("# Autogenerated version", file=ostream)
print(file=ostream)
print("VERSION", "=", repr(VERSION), file=ostream)
with open("rethinkdb/version.py", "w") as f:
f.writelines([
"# Autogenerated version",
"VERSION = {0}".format(VERSION)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to use repr.

])
else:
raise RuntimeError("{!r} does not match version format {!r}".format(
RETHINKDB_VERSION_DESCRIBE, VERSION_RE))
Expand Down