Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9d6a0b0

Browse files
committed
ENH: update joblib
2012-05-07 Vlad Niculae ENH: controlled randomness in tests and doctest fix 2012-02-21 GaelVaroquaux ENH: add verbosity in memory 2012-02-21 GaelVaroquaux BUG: non-reproducible hashing: order of kwargs The ordering of a dictionnary is random. As a result the function hashing was not reproducible.
1 parent 2b287c3 commit 9d6a0b0

File tree

6 files changed

+60
-12
lines changed

6 files changed

+60
-12
lines changed

sklearn/externals/joblib/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
inputs and outputs: Python functions. Joblib can save their
6161
computation to disk and rerun it only if necessary::
6262
63+
>>> import numpy as np
6364
>>> from sklearn.externals.joblib import Memory
6465
>>> mem = Memory(cachedir='/tmp/joblib')
6566
>>> import numpy as np
@@ -101,7 +102,7 @@
101102
102103
"""
103104

104-
__version__ = '0.6.3'
105+
__version__ = '0.6.4'
105106

106107

107108
from .memory import Memory

sklearn/externals/joblib/func_inspect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
207207
)
208208

209209
varkwargs = dict()
210-
for arg_name, arg_value in kwargs.iteritems():
210+
for arg_name, arg_value in sorted(kwargs.items()):
211211
if arg_name in arg_dict:
212212
arg_dict[arg_name] = arg_value
213213
elif arg_keywords is not None:

sklearn/externals/joblib/memory.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,15 @@ def __init__(self, func, cachedir, ignore=None, mmap_mode=None,
159159
def __call__(self, *args, **kwargs):
160160
# Compare the function code with the previous to see if the
161161
# function code has changed
162-
output_dir, _ = self.get_output_dir(*args, **kwargs)
162+
output_dir, argument_hash = self.get_output_dir(*args, **kwargs)
163163
# FIXME: The statements below should be try/excepted
164164
if not (self._check_previous_func_code(stacklevel=3) and
165165
os.path.exists(output_dir)):
166+
if self._verbose > 10:
167+
_, name = get_func_name(self.func)
168+
self.warn('Computing func %s, argument hash %s in '
169+
'directory %s'
170+
% (name, argument_hash, output_dir))
166171
return self.call(*args, **kwargs)
167172
else:
168173
try:
@@ -287,6 +292,10 @@ def _check_previous_func_code(self, stacklevel=2):
287292

288293
# The function has changed, wipe the cache directory.
289294
# XXX: Should be using warnings, and giving stacklevel
295+
if self._verbose > 10:
296+
_, func_name = get_func_name(self.func, resolv_alias=False)
297+
self.warn("Function %s (stored in %s) has changed." %
298+
(func_name, func_dir))
290299
self.clear(warn=True)
291300
return False
292301

@@ -308,12 +317,11 @@ def call(self, *args, **kwargs):
308317
persist the output values.
309318
"""
310319
start_time = time.time()
320+
output_dir, argument_hash = self.get_output_dir(*args, **kwargs)
311321
if self._verbose:
312322
print self.format_call(*args, **kwargs)
313-
output_dir, argument_hash = self.get_output_dir(*args, **kwargs)
314323
output = self.func(*args, **kwargs)
315324
self._persist_output(output, output_dir)
316-
input_repr = self._persist_input(output_dir, *args, **kwargs)
317325
duration = time.time() - start_time
318326
if self._verbose:
319327
_, name = get_func_name(self.func)
@@ -368,6 +376,8 @@ def _persist_output(self, output, dir):
368376
mkdirp(dir)
369377
filename = os.path.join(dir, 'output.pkl')
370378
numpy_pickle.dump(output, filename, compress=self.compress)
379+
if self._verbose > 10:
380+
print 'Persisting in %s' % dir
371381
except OSError:
372382
" Race condition in the creation of the directory "
373383

@@ -398,10 +408,17 @@ def load_output(self, output_dir):
398408
"""
399409
if self._verbose > 1:
400410
t = time.time() - self.timestamp
401-
print '[Memory]% 16s: Loading %s...' % (
411+
if self._verbose < 10:
412+
print '[Memory]% 16s: Loading %s...' % (
402413
format_time(t),
403414
self.format_signature(self.func)[0]
404415
)
416+
else:
417+
print '[Memory]% 16s: Loading %s from %s' % (
418+
format_time(t),
419+
self.format_signature(self.func)[0],
420+
output_dir
421+
)
405422
filename = os.path.join(output_dir, 'output.pkl')
406423
return numpy_pickle.load(filename,
407424
mmap_mode=self.mmap_mode)

sklearn/externals/joblib/test/test_hashing.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ def test_hash_methods():
9393
def test_hash_numpy():
9494
""" Test hashing with numpy arrays.
9595
"""
96-
arr1 = np.random.random((10, 10))
96+
rnd = np.random.RandomState(0)
97+
arr1 = rnd.random_sample((10, 10))
9798
arr2 = arr1.copy()
9899
arr3 = arr2.copy()
99100
arr3[0] += 1
@@ -160,7 +161,8 @@ def test_hash_numpy_performance():
160161
In [26]: %timeit hash(a)
161162
100 loops, best of 3: 20.8 ms per loop
162163
"""
163-
a = np.random.random(1000000)
164+
rnd = np.random.RandomState(0)
165+
a = rnd.random_sample(1000000)
164166
md5_hash = lambda x: hashlib.md5(np.getbuffer(x)).hexdigest()
165167

166168
relative_diff = relative_time(md5_hash, hash, a)

sklearn/externals/joblib/test/test_memory.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,10 @@ def n(l=None):
325325
verbose=0)
326326
memory.clear(warn=False)
327327
cached_n = memory.cache(n)
328+
329+
rnd = np.random.RandomState(0)
328330
for i in range(3):
329-
a = np.random.random((10, 10))
331+
a = rnd.random_sample((10, 10))
330332
for _ in range(3):
331333
yield nose.tools.assert_true, np.all(cached_n(a) == a)
332334
yield nose.tools.assert_equal, len(accumulator), i + 1

sklearn/externals/joblib/test/test_numpy_pickle.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ def test_value_error():
134134
@with_numpy
135135
def test_numpy_persistence():
136136
filename = env['filename']
137-
a = np.random.random((10, 2))
137+
rnd = np.random.RandomState(0)
138+
a = rnd.random_sample((10, 2))
138139
for compress, cache_size in ((0, 0), (1, 0), (1, 10)):
139140
# We use 'a.T' to have a non C-contiguous array.
140141
for index, obj in enumerate(((a,), (a.T,), (a, a), [a, a, a])):
@@ -183,7 +184,8 @@ def test_numpy_persistence():
183184

184185
@with_numpy
185186
def test_memmap_persistence():
186-
a = np.random.random(10)
187+
rnd = np.random.RandomState(0)
188+
a = rnd.random_sample(10)
187189
filename = env['filename'] + str(random.randint(0, 1000))
188190
numpy_pickle.dump(a, filename)
189191
b = numpy_pickle.load(filename, mmap_mode='r')
@@ -195,7 +197,8 @@ def test_memmap_persistence():
195197
def test_masked_array_persistence():
196198
# The special-case picker fails, because saving masked_array
197199
# not implemented, but it just delegates to the standard pickler.
198-
a = np.random.random(10)
200+
rnd = np.random.RandomState(0)
201+
a = rnd.random_sample(10)
199202
a = np.ma.masked_greater(a, 0.5)
200203
filename = env['filename'] + str(random.randint(0, 1000))
201204
numpy_pickle.dump(a, filename)
@@ -210,3 +213,26 @@ def test_z_file():
210213
numpy_pickle.write_zfile(file(filename, 'wb'), data)
211214
data_read = numpy_pickle.read_zfile(file(filename, 'rb'))
212215
nose.tools.assert_equal(data, data_read)
216+
217+
################################################################################
218+
# Test dumping array subclasses
219+
if np is not None:
220+
221+
class SubArray(np.ndarray):
222+
223+
def __reduce__(self):
224+
return (_load_sub_array, (np.asarray(self), ))
225+
226+
227+
def _load_sub_array(arr):
228+
d = SubArray(arr.shape)
229+
d[:] = arr
230+
return d
231+
232+
@with_numpy
233+
def test_numpy_subclass():
234+
filename = env['filename']
235+
a = SubArray((10,))
236+
numpy_pickle.dump(a, filename)
237+
c = numpy_pickle.load(filename)
238+
nose.tools.assert_true(isinstance(c, SubArray))

0 commit comments

Comments
 (0)