actorUser
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎README_cn.md‎
Lines changed: 1 addition & 1 deletion b/‎README_cn.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README_en.md‎
Lines changed: 1 addition & 1 deletion b/‎README_en.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎applications/doc_vqa/Extraction/docvqa.py‎
Lines changed: 25 additions & 20 deletions b/‎applications/doc_vqa/Extraction/docvqa.py‎
Lines changed: 25 additions & 20 deletions
diff --git a/‎applications/doc_vqa/Extraction/model.py‎
Lines changed: 49 additions & 37 deletions b/‎applications/doc_vqa/Extraction/model.py‎
Lines changed: 49 additions & 37 deletions
@@ -1,11 +1,11 @@
 repos:
--   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
-    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+-   repo: https://github.com/google/yapf
+    rev: v0.32.0
     hooks:
     -   id: yapf
         files: \.py$
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    sha: a11d9314b22d8f8c7556443875b731ef05965464
+    rev: v4.1.0
     hooks:
     -   id: check-merge-conflict
     -   id: check-symlinks
@@ -16,7 +16,7 @@ repos:
     -   id: trailing-whitespace
         files: \.md$
 -   repo: https://github.com/Lucas-C/pre-commit-hooks
-    sha: v1.0.1
+    rev: v1.1.14
     hooks:
     -   id: forbid-crlf
         files: \.md$
 
@@ -294,7 +294,7 @@ PaddleNLP提供[一键预测功能](./docs/model_zoo/taskflow.md)，无需训练
 PaddleNLP提供全流程的文本领域API，可大幅提升NLP任务建模的效率：
 
 - 支持[千言](https://www.luge.ai)等丰富中文数据集加载的[Dataset API](https://paddlenlp.readthedocs.io/zh/latest/data_prepare/dataset_list.html)。
-- 提供🤗Hugging Face Style的API，支持 **500+** 优质预训练模型加载的[Transformers API](https://paddlenlp.readthedocs.io/zh/latest/model_zoo/index.html)。  
+- 提供🤗Hugging Face Style的API，支持 **500+** 优质预训练模型加载的[Transformers API](https://paddlenlp.readthedocs.io/zh/latest/model_zoo/index.html)。
 - 提供30+多语言词向量的[Embedding API](https://paddlenlp.readthedocs.io/zh/latest/model_zoo/embeddings.html)
 
 更多使用方法请参考[API文档](https://paddlenlp.readthedocs.io/zh/latest/)。
 
@@ -57,7 +57,7 @@ For more usage please refer to [Taskflow Docs](./docs/model_zoo/taskflow.md).
 
 ### Awesome Chinese Model Zoo
 
-#### 🀄 Comprehensive Chinese Transformer Models  
+#### 🀄 Comprehensive Chinese Transformer Models
 
 We provide **45+** network architectures and over **500+** pretrained models. Not only includes all the SOTA model like ERNIE, PLATO and SKEP released by Baidu, but also integrates most of the high-quality Chinese pretrained model developed by other organizations. Use `AutoModel` API to **⚡SUPER FAST⚡** download pretrained mdoels of different architecture. We welcome all developers to contribute your Transformer models to PaddleNLP!
 
 
@@ -14,6 +14,7 @@
 
 
 class DocVQAExample(object):
+
     def __init__(self,
                  question,
                  doc_tokens,
@@ -48,6 +49,7 @@ def __init__(self,
 
 
 class DocVQA(Dataset):
+
     def __init__(self,
                  args,
                  tokenizer,
@@ -264,7 +266,8 @@ def convert_examples_to_features(self, examples, tokenizer, label_map,
                 input_mask=spans_input_mask,
                 segment_ids=spans_segment_ids,
                 boxes=spans_boxes_tokens,
-                label=label_ids, )
+                label=label_ids,
+            )
             features.append(feature)
         return features
 
@@ -292,8 +295,9 @@ def create_examples(self, data, is_test=False):
                 scale_y = 1000 / max(width, height)
 
             scaled_doc_boxes = [[
-                round((b[0] - x_min) * scale_x), round(
-                    (b[2] - y_min) * scale_y), round((b[1] - x_min) * scale_x),
+                round((b[0] - x_min) * scale_x),
+                round((b[2] - y_min) * scale_y),
+                round((b[1] - x_min) * scale_x),
                 round((b[3] - y_min) * scale_y)
             ] for b in doc_boxes]
 
@@ -308,11 +312,10 @@ def create_examples(self, data, is_test=False):
                     if pos > 1000:
                         print(width, height, box, oribox)
 
-            example = DocVQAExample(
-                question=question,
-                doc_tokens=doc_tokens,
-                doc_boxes=scaled_doc_boxes,
-                labels=labels)
+            example = DocVQAExample(question=question,
+                                    doc_tokens=doc_tokens,
+                                    doc_boxes=scaled_doc_boxes,
+                                    labels=labels)
             examples.append(example)
         return examples
 
@@ -338,19 +341,21 @@ def docvqa_input(self):
             max_span_num=self.max_span_num,
             max_query_length=self.max_query_length)
 
-        all_input_ids = paddle.to_tensor(
-            [f.input_ids for f in features], dtype="int64")
-        all_input_mask = paddle.to_tensor(
-            [f.input_mask for f in features], dtype="int64")
-        all_segment_ids = paddle.to_tensor(
-            [f.segment_ids for f in features], dtype="int64")
-        all_bboxes = paddle.to_tensor(
-            [f.boxes for f in features], dtype="int64")
-        all_labels = paddle.to_tensor(
-            [f.label for f in features], dtype="int64")
+        all_input_ids = paddle.to_tensor([f.input_ids for f in features],
+                                         dtype="int64")
+        all_input_mask = paddle.to_tensor([f.input_mask for f in features],
+                                          dtype="int64")
+        all_segment_ids = paddle.to_tensor([f.segment_ids for f in features],
+                                           dtype="int64")
+        all_bboxes = paddle.to_tensor([f.boxes for f in features],
+                                      dtype="int64")
+        all_labels = paddle.to_tensor([f.label for f in features],
+                                      dtype="int64")
         self.sample_list = [
-            np.array(all_input_ids), np.array(all_input_mask),
-            np.array(all_segment_ids), np.array(all_bboxes),
+            np.array(all_input_ids),
+            np.array(all_input_mask),
+            np.array(all_segment_ids),
+            np.array(all_bboxes),
             np.array(all_labels)
         ]
 
 
@@ -7,6 +7,7 @@
 
 
 class Crf_decoding(paddle.fluid.dygraph.Layer):
+
     def __init__(self, param_attr, size=None, is_test=True, dtype='float32'):
         super(Crf_decoding, self).__init__()
 
@@ -38,16 +39,20 @@ def forward(self, input, label=None, length=None):
         }
         if length is not None:
             this_inputs['Length'] = [length]
-        self._helper.append_op(
-            type='crf_decoding',
-            inputs=this_inputs,
-            outputs={"ViterbiPath": [viterbi_path]},
-            attrs={"is_test": self._is_test, })
+        self._helper.append_op(type='crf_decoding',
+                               inputs=this_inputs,
+                               outputs={"ViterbiPath": [viterbi_path]},
+                               attrs={
+                                   "is_test": self._is_test,
+                               })
         return viterbi_path
 
 
 class Chunk_eval(paddle.fluid.dygraph.Layer):
-    def __init__(self, num_chunk_types, chunk_scheme,
+
+    def __init__(self,
+                 num_chunk_types,
+                 chunk_scheme,
                  excluded_chunk_types=None):
         super(Chunk_eval, self).__init__()
         self.num_chunk_types = num_chunk_types
@@ -73,27 +78,30 @@ def forward(self, input, label, seq_length=None):
         if seq_length is not None:
             this_input["SeqLength"] = [seq_length]
 
-        self._helper.append_op(
-            type='chunk_eval',
-            inputs=this_input,
-            outputs={
-                "Precision": [precision],
-                "Recall": [recall],
-                "F1-Score": [f1_score],
-                "NumInferChunks": [num_infer_chunks],
-                "NumLabelChunks": [num_label_chunks],
-                "NumCorrectChunks": [num_correct_chunks]
-            },
-            attrs={
-                "num_chunk_types": self.num_chunk_types,
-                "chunk_scheme": self.chunk_scheme,
-                "excluded_chunk_types": self.excluded_chunk_types or []
-            })
+        self._helper.append_op(type='chunk_eval',
+                               inputs=this_input,
+                               outputs={
+                                   "Precision": [precision],
+                                   "Recall": [recall],
+                                   "F1-Score": [f1_score],
+                                   "NumInferChunks": [num_infer_chunks],
+                                   "NumLabelChunks": [num_label_chunks],
+                                   "NumCorrectChunks": [num_correct_chunks]
+                               },
+                               attrs={
+                                   "num_chunk_types":
+                                   self.num_chunk_types,
+                                   "chunk_scheme":
+                                   self.chunk_scheme,
+                                   "excluded_chunk_types":
+                                   self.excluded_chunk_types or []
+                               })
         return (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
                 num_correct_chunks)
 
 
 class Linear_chain_crf(paddle.fluid.dygraph.Layer):
+
     def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
         super(Linear_chain_crf, self).__init__()
 
@@ -131,29 +139,31 @@ def forward(self, input, label, length=None):
         }
         if length is not None:
             this_inputs['Length'] = [length]
-        self._helper.append_op(
-            type='linear_chain_crf',
-            inputs=this_inputs,
-            outputs={
-                "Alpha": [alpha],
-                "EmissionExps": [emission_exps],
-                "TransitionExps": transition_exps,
-                "LogLikelihood": log_likelihood
-            },
-            attrs={"is_test": self._is_test, })
+        self._helper.append_op(type='linear_chain_crf',
+                               inputs=this_inputs,
+                               outputs={
+                                   "Alpha": [alpha],
+                                   "EmissionExps": [emission_exps],
+                                   "TransitionExps": transition_exps,
+                                   "LogLikelihood": log_likelihood
+                               },
+                               attrs={
+                                   "is_test": self._is_test,
+                               })
         return log_likelihood
 
 
 class LayoutXLMForTokenClassification_with_CRF(LayoutXLMPretrainedModel):
+
     def __init__(self, layoutxlm, num_classes, dropout=None):
         super(LayoutXLMForTokenClassification_with_CRF, self).__init__()
         self.num_classes = num_classes
         if isinstance(layoutxlm, dict):
             self.layoutxlm = LayoutXLMModel(**layoutxlm)
         else:
             self.layoutxlm = layoutxlm
-        self.dropout = nn.Dropout(dropout if dropout is not None else
-                                  self.layoutxlm.config["hidden_dropout_prob"])
+        self.dropout = nn.Dropout(dropout if dropout is not None else self.
+                                  layoutxlm.config["hidden_dropout_prob"])
         self.emission_classifier = nn.Linear(
             self.layoutxlm.config["hidden_size"], self.num_classes)
         self.emission_classifier.apply(self.init_weights)
@@ -198,7 +208,8 @@ def forward(self,
             attention_mask=attention_mask,
             token_type_ids=token_type_ids,
             position_ids=position_ids,
-            head_mask=head_mask, )
+            head_mask=head_mask,
+        )
         seq_length = input_ids.shape[1]
         # sequence out and image out
         sequence_logits, image_output = outputs[0][:, :seq_length], outputs[
@@ -208,8 +219,9 @@ def forward(self,
         labels = labels.reshape([-1, seq_length, 1])
 
         # standard crf loss
-        crf_cost = self.linear_chain_crf(
-            input=emission, label=labels, length=length)
+        crf_cost = self.linear_chain_crf(input=emission,
+                                         label=labels,
+                                         length=length)
         crf_decode = self.crf_decoding(input=emission, length=length)
         if is_train:
             return [crf_cost]