Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 71af72b

Browse files
zhoukunshengluotao1
authored andcommitted
upgrade hash op to support Tensor and LoDTensor input (PaddlePaddle#17998)
1 parent d3b3443 commit 71af72b

5 files changed

Lines changed: 48 additions & 58 deletions

File tree

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], va
238238
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65c8362e48810b8226e311c5d046db51'))
239239
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110'))
240240
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb'))
241-
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'da621ba1363e8f5fe7b702526bbae18f'))
241+
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'a0b73c21be618cec0281e7903039e5e3'))
242242
paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5d16663e096d7f04954c70ce1cc5e195'))
243243
paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e3993a477c94729526040ff65d95728e'))
244244
paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937'))

paddle/fluid/operators/hash_op.cc

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
1+
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -46,11 +46,10 @@ class HashOp : public framework::OperatorWithKernel {
4646
class HashOpMaker : public framework::OpProtoAndCheckerMaker {
4747
public:
4848
void Make() override {
49-
AddInput("X", "(Tensor) Input tensor of scale operator.");
50-
AddOutput("Out", "(Tensor) Output tensor of scale operator.");
49+
AddInput("X", "(Tensor) Input tensor of hash operator.");
50+
AddOutput("Out", "(Tensor) Output tensor of hash operator.");
5151
AddComment(R"DOC(
52-
**Hash Operator**
53-
$$Out = scale * X$$
52+
Execute `num_hash` times xxHash algorithm on all elements on second dimension of input.
5453
)DOC");
5554
AddAttr<int>("num_hash", "").SetDefault(1);
5655
AddAttr<int>("mod_by", "").SetDefault(100000);

paddle/fluid/operators/hash_op.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
1+
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -47,10 +47,6 @@ class HashKernel : public framework::OpKernel<T> {
4747
int num_hash = context.Attr<int>("num_hash");
4848

4949
auto in_dims = in_t->dims();
50-
auto in_lod = in_t->lod();
51-
PADDLE_ENFORCE_EQ(
52-
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
53-
"The actual input data's size mismatched with LoD information.");
5450

5551
std::vector<int64_t> out_dims;
5652
HashOutputSize(in_dims, out_dims, num_hash);
@@ -67,6 +63,7 @@ class HashKernel : public framework::OpKernel<T> {
6763
}
6864
input += last_dim;
6965
}
66+
7067
out_t->set_lod(in_t->lod());
7168
}
7269
};

python/paddle/fluid/layers/nn.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10810,12 +10810,9 @@ def hash(input, hash_size, num_hash=1, name=None):
1081010810
Given:
1081110811
1081210812
# shape [2, 2]
10813-
input.data = [
10813+
input.data =
1081410814
[[1, 2],
10815-
[3, 4]],
10816-
]
10817-
10818-
input.lod = [[0, 2]]
10815+
[3, 4]]
1081910816
1082010817
hash_size = 10000
1082110818
@@ -10833,40 +10830,32 @@ def hash(input, hash_size, num_hash=1, name=None):
1083310830
[8310, 1327, 1654, 4567]],
1083410831
]
1083510832
10836-
output.lod = [[0, 2]]
10837-
1083810833
Args:
1083910834
input (Variable): The input variable which is a one-hot word. The
10840-
dimensions of the input variable must be 2.
10835+
dimensions of the input variable must be 2. Both Tensor and LoDTensor are supported.
1084110836
hash_size (int): The space size for hash algorithm. The output value
1084210837
will keep in the range:math:`[0, hash_size - 1]`.
1084310838
num_hash (int): The times of hash, default 1.
1084410839
name (str, default None): The name of this layer.
1084510840
1084610841
Returns:
10847-
Variable: The hash result variable which is a LoDTensor.
10842+
Variable: The hash result variable, which the same variable type as `input`.
1084810843
1084910844
Examples:
1085010845
.. code-block:: python
1085110846
1085210847
import paddle.fluid as fluid
10853-
import paddle.fluid.layers as layers
10854-
import numpy as np
10855-
10856-
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=1)
10857-
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=1, hash_size=1000)
1085810848
10859-
place = fluid.core.CPUPlace()
10860-
exece = fluid.Executor(place)
10861-
exece.run(fluid.default_startup_program())
10849+
# titles has shape [batch, 1]
10850+
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=0)
10851+
# hash_r has shape [batch, 2]
10852+
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=2, hash_size=1000)
1086210853
10863-
# Init Tensor
10864-
tensor = fluid.core.LoDTensor()
10865-
tensor.set(np.random.randint(0, 10, (3, 1)).astype("int32"), place)
10866-
# Set LoD
10867-
tensor.set_recursive_sequence_lengths([[1, 1, 1]])
1086810854
10869-
out = exece.run(feed={'titles': tensor}, fetch_list=[hash_r], return_numpy=False)
10855+
# titles has shape [batch, 1] and lod information
10856+
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=1)
10857+
# hash_r has shape [batch, 2] and inherits lod information from titles
10858+
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=2, hash_size=1000)
1087010859
"""
1087110860
helper = LayerHelper('hash', **locals())
1087210861
out = helper.create_variable_for_type_inference(

python/paddle/fluid/tests/unittests/test_hash_op.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -17,36 +17,41 @@
1717
from op_test import OpTest
1818

1919

20-
class TestScaleOp(OpTest):
20+
class TestHashOp(OpTest):
2121
def setUp(self):
2222
self.op_type = "hash"
2323
self.init_test_case()
2424
self.inputs = {'X': (self.in_seq, self.lod)}
25-
self.attrs = {'num_hash': 4, 'mod_by': 10000}
25+
self.attrs = {'num_hash': 2, 'mod_by': 10000}
2626
self.outputs = {'Out': (self.out_seq, self.lod)}
2727

2828
def init_test_case(self):
29-
np.random.seed = 1
30-
self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
31-
self.lod = [[9, 4, 11, 6]]
32-
# self.out_seq = np.ones([30, 4, 1], dtype=np.int32)
33-
self.out_seq = [
34-
[[9662], [9217], [1129], [8487]], [[9662], [9217], [1129], [8487]],
35-
[[8310], [1327], [1654], [4567]], [[6897], [3218], [2013], [1241]],
36-
[[9407], [6715], [6949], [8094]], [[8473], [694], [5142], [2479]],
37-
[[8310], [1327], [1654], [4567]], [[6897], [3218], [2013], [1241]],
38-
[[4372], [9456], [8204], [6695]], [[6897], [3218], [2013], [1241]],
39-
[[8473], [694], [5142], [2479]], [[4372], [9456], [8204], [6695]],
40-
[[4372], [9456], [8204], [6695]], [[8473], [694], [5142], [2479]],
41-
[[9407], [6715], [6949], [8094]], [[9369], [4525], [8935], [9210]],
42-
[[4372], [9456], [8204], [6695]], [[4372], [9456], [8204], [6695]],
43-
[[9369], [4525], [8935], [9210]], [[6897], [3218], [2013], [1241]],
44-
[[9038], [7951], [5953], [8657]], [[9407], [6715], [6949], [8094]],
45-
[[9662], [9217], [1129], [8487]], [[9369], [4525], [8935], [9210]],
46-
[[9038], [7951], [5953], [8657]], [[9662], [9217], [1129], [8487]],
47-
[[9369], [4525], [8935], [9210]], [[1719], [5986], [9919], [3421]],
48-
[[4372], [9456], [8204], [6695]], [[9038], [7951], [5953], [8657]]
49-
]
29+
np.random.seed(1)
30+
self.in_seq = np.random.randint(0, 10, (8, 1)).astype("int32")
31+
self.lod = [[2, 6]]
32+
self.out_seq = [[[3481], [7475]], [[1719], [5986]], [[8473], [694]],
33+
[[3481], [7475]], [[4372], [9456]], [[4372], [9456]],
34+
[[6897], [3218]], [[9038], [7951]]]
35+
self.out_seq = np.array(self.out_seq)
36+
37+
def test_check_output(self):
38+
self.check_output()
39+
40+
41+
class TestHashNotLoDOp(TestHashOp):
42+
def setUp(self):
43+
self.op_type = "hash"
44+
self.init_test_case()
45+
self.inputs = {'X': self.in_seq}
46+
self.attrs = {'num_hash': 2, 'mod_by': 10000}
47+
self.outputs = {'Out': self.out_seq}
48+
49+
def init_test_case(self):
50+
np.random.seed(1)
51+
self.in_seq = np.random.randint(0, 10, (8, 1)).astype("int32")
52+
self.out_seq = [[[3481], [7475]], [[1719], [5986]], [[8473], [694]],
53+
[[3481], [7475]], [[4372], [9456]], [[4372], [9456]],
54+
[[6897], [3218]], [[9038], [7951]]]
5055
self.out_seq = np.array(self.out_seq)
5156

5257
def test_check_output(self):

0 commit comments

Comments
 (0)