Update README.md (PaddlePaddle#2177)

linjieccc · ZeyuChen · joey12300 · web-flow · commit 27033792a433 · 2022-05-16T23:14:52.000+08:00
* Update README.md

* fix codestyle

* fix codestyle

Co-authored-by: Zeyu Chen &lt;chenzeyu01@baidu.com&gt;
Co-authored-by: zhoushunjie &lt;zhoushunjie@baidu.com&gt;
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@
   <a href=#安装> 安装 </a> |
   <a href=#QuickStart> 快速开始 </a> |
   <a href=#API文档> API文档 </a> |
-  <a href=#社区交流> 社区交流 </a> 
+  <a href=#社区交流> 社区交流 </a>
 </h4>
 
 ## News  <img src="./docs/imgs/news_icon.png" width="40"/>
@@ -48,11 +48,11 @@ PaddleNLP是飞桨自然语言处理开发库，旨在提升开发者在文本
 
 #### <img src="https://user-images.githubusercontent.com/11793384/168454776-2075cc68-9402-4b0b-8723-5be0a315ddc3.png" width="20" height="20" /><a href=#开箱即用的NLP能力> 开箱即用的NLP能力 </a>
 
-#### <img src="https://user-images.githubusercontent.com/11793384/168454751-f111d8b4-a16a-4e36-b9de-3af8a2f00714.png" width="20" height="20" /><a href=#丰富完备的中文模型库> 丰富完备的中文模型库 </a> 
+#### <img src="https://user-images.githubusercontent.com/11793384/168454751-f111d8b4-a16a-4e36-b9de-3af8a2f00714.png" width="20" height="20" /><a href=#丰富完备的中文模型库> 丰富完备的中文模型库 </a>
 
-#### <img src="https://user-images.githubusercontent.com/11793384/168454721-0ac49e17-22db-4074-ba20-940365daf9f6.png" width="20" height="20" /><a href=#产业级端到端系统范例> 产业级端到端系统范例 </a> 
+#### <img src="https://user-images.githubusercontent.com/11793384/168454721-0ac49e17-22db-4074-ba20-940365daf9f6.png" width="20" height="20" /><a href=#产业级端到端系统范例> 产业级端到端系统范例 </a>
 
-#### <img src="https://user-images.githubusercontent.com/11793384/168454587-8b5a0f63-3d4b-4339-be47-f3ad7ef9e16c.png" width="20" height="20" /><a href=#高性能分布式训练与推理> 高性能分布式训练与推理 </a> 
+#### <img src="https://user-images.githubusercontent.com/11793384/168454587-8b5a0f63-3d4b-4339-be47-f3ad7ef9e16c.png" width="20" height="20" /><a href=#高性能分布式训练与推理> 高性能分布式训练与推理 </a>
 
 
 ### 开箱即用的NLP能力
@@ -267,16 +267,16 @@ pip install --upgrade paddlenlp
 
 - 一键预测
 
-PaddleNLP提供[一键预测功能](./docs/model_zoo/taskflow.md)，无需训练，直接输入数据，即可得到预测结果，以情感分析任务为例：
+PaddleNLP提供[一键预测功能](./docs/model_zoo/taskflow.md)，无需训练，直接输入数据，即可得到预测结果：
 
 ```python
-from pprint import pprint
-from paddlenlp import Taskflow
+>>> from pprint import pprint
+>>> from paddlenlp import Taskflow
 
-schema = ['时间', '选手', '赛事名称'] # Define the schema for entity extraction
-ie = Taskflow('information_extraction', schema=schema)
-pprint(ie("2月8日上午北京冬奥会自由式滑雪女子大跳台决赛中中国选手谷爱凌以188.25分获得金牌！"))
->>> [{'时间': [{'end': 6,
+>>> schema = ['时间', '选手', '赛事名称'] # Define the schema for entity extraction
+>>> ie = Taskflow('information_extraction', schema=schema)
+>>> pprint(ie("2月8日上午北京冬奥会自由式滑雪女子大跳台决赛中中国选手谷爱凌以188.25分获得金牌！"))
+[{'时间': [{'end': 6,
           'probability': 0.9857378532924486,
           'start': 0,
           'text': '2月8日上午'}],
@@ -292,13 +292,7 @@ pprint(ie("2月8日上午北京冬奥会自由式滑雪女子大跳台决赛中
 
 - 定制训练
 
-如果对一键预测效果不满意，也可以进行模型微调，这里对UIE模型进行微调，以进一步提升命名实体识别的准确率：
-
-```python
-from paddlenlp.transformers import ErniePretrainedModel，AutoTokenizer
-```
-
-完整微调代码，可参考[UIE微调](./model_zoo/uie/)
+如果对一键预测效果不满意，也可以使用少量数据进行模型微调，进一步提升模型在特定场景的效果，详见[UIE小样本定制训练](./model_zoo/uie/)。
 
 更多内容可参考：[多场景示例](./examples)，[PaddleNLP on AI Studio](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/574995)。
 
@@ -309,7 +303,7 @@ PaddleNLP提供全流程的文本领域API，可大幅提升NLP任务建模的
 
 - 支持丰富中文数据集加载的[Dataset API](https://paddlenlp.readthedocs.io/zh/latest/data_prepare/dataset_list.html)；
 - 灵活高效地完成数据预处理的[Data API](https://paddlenlp.readthedocs.io/zh/latest/source/paddlenlp.data.html)；
-- 提供500+预训练模型的[Transformers API](./docs/model_zoo/transformers.rst)。    
+- 提供500+预训练模型的[Transformers API](./docs/model_zoo/transformers.rst)。  
 
 更多使用方法请参考[API文档](https://paddlenlp.readthedocs.io/zh/latest/source/paddlenlp.data.html)。
 
diff --git a/README_en.md b/README_en.md
@@ -23,7 +23,7 @@
   <a href=#Installation> Installation </a> |
   <a href=#QuickStart> Quick Start </a> |
   <a href=#APIReference> API Reference </a> |
-  <a href=#Community> Community </a> 
+  <a href=#Community> Community </a>
 </h4>
 
 ## News  <img src="./docs/imgs/news_icon.png" width="40"/>
@@ -45,11 +45,11 @@ PaddleNLP is an easy-to-use and high performance NLP library with awesome pre-tr
 
 #### <img src="https://user-images.githubusercontent.com/11793384/168454776-2075cc68-9402-4b0b-8723-5be0a315ddc3.png" width="20" height="20" /><a href=#Off-the-shelf NLP Pre-built Task> Off-the-shelf NLP Pre-built Task </a>
 
-#### <img src="https://user-images.githubusercontent.com/11793384/168454751-f111d8b4-a16a-4e36-b9de-3af8a2f00714.png" width="20" height="20" /><a href=#Awesome Chinese Pre-trained Model Zoo> Awesome Chinese Pre-trained Model Zoo </a> 
+#### <img src="https://user-images.githubusercontent.com/11793384/168454751-f111d8b4-a16a-4e36-b9de-3af8a2f00714.png" width="20" height="20" /><a href=#Awesome Chinese Pre-trained Model Zoo> Awesome Chinese Pre-trained Model Zoo </a>
 
-#### <img src="https://user-images.githubusercontent.com/11793384/168454721-0ac49e17-22db-4074-ba20-940365daf9f6.png" width="20" height="20" /><a href=#Industrial End-to-end NLP System> Industrial End-to-end NLP System </a> 
+#### <img src="https://user-images.githubusercontent.com/11793384/168454721-0ac49e17-22db-4074-ba20-940365daf9f6.png" width="20" height="20" /><a href=#Industrial End-to-end NLP System> Industrial End-to-end NLP System </a>
 
-#### <img src="https://user-images.githubusercontent.com/11793384/168454587-8b5a0f63-3d4b-4339-be47-f3ad7ef9e16c.png" width="20" height="20" /><a href=#High Performance Distributed Training and Infernece> High Performance Distributed Training and Infernece </a> 
+#### <img src="https://user-images.githubusercontent.com/11793384/168454587-8b5a0f63-3d4b-4339-be47-f3ad7ef9e16c.png" width="20" height="20" /><a href=#High Performance Distributed Training and Infernece> High Performance Distributed Training and Infernece </a>
 
 
 ### Off-the-shelf NLP Pre-built Task
@@ -64,7 +64,7 @@ For more usage please refer to [Taskflow Docs](./docs/model_zoo/taskflow.md)。
 
 #### Comprehensive Chinese Transformer Models
 
-We provide 45+ network architectures and over 500+ pretrained models. Not only includes all the SOTA model like ERNIE, PLATO and SKEP released by Baidu, but also integrates most of the high quality Chinese pretrained model developed by other organizations. Use AutoModel API to **⚡FAST⚡** download pretrained mdoels of different architecture. We welcome all developers to contribute your Transformer models to PaddleNLP! 
+We provide 45+ network architectures and over 500+ pretrained models. Not only includes all the SOTA model like ERNIE, PLATO and SKEP released by Baidu, but also integrates most of the high quality Chinese pretrained model developed by other organizations. Use AutoModel API to **⚡FAST⚡** download pretrained mdoels of different architecture. We welcome all developers to contribute your Transformer models to PaddleNLP!
 
 ```python
 from paddlenlp.transformers import *
@@ -152,7 +152,7 @@ For more pretrained model usage, please refer to [Transformer API Docs](./docs/m
 
 PaddleNLP provides rich application examples covering mainstream NLP task to help developers accelerate problem solving. You can find our powerful transformer [Model Zoo](./model_zoo), and wide-range NLP application [exmaples](./examples) with detailed instructions.
 
-Also you can run our interactive [Notebook tutorial](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/574995) on AI Studio, a powerful platform with **FREE** computing resource. 
+Also you can run our interactive [Notebook tutorial](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/574995) on AI Studio, a powerful platform with **FREE** computing resource.
 
 
 ### Industrial End-to-end System Cases
@@ -161,7 +161,7 @@ We provide high value scenarios including information extraction, semantic retri
 
 #### Speech Command Analysis
 
-Integrated ASR Model, Information Extraction, we provide a speech command analysis pipeline that show how to use PaddleNLP and PaddleSpeech to solve Speech + NLP real scenarios. 
+Integrated ASR Model, Information Extraction, we provide a speech command analysis pipeline that show how to use PaddleNLP and PaddleSpeech to solve Speech + NLP real scenarios.
 
 <div align="center">
     <img src="https://user-images.githubusercontent.com/11793384/168412618-04897a47-79c9-4fe7-a054-5dc1f6a1f75c.png" width="500">
@@ -293,4 +293,4 @@ We have borrowed from Hugging Face's [Transformer](https://github.com/huggingfac
 
 ## License
 
-PaddleNLP is provided under the [Apache-2.0 License](./LICENSE).
+PaddleNLP is provided under the [Apache-2.0 License](./LICENSE).
diff --git a/examples/dependency_parsing/ddparser/data.py b/examples/dependency_parsing/ddparser/data.py
@@ -258,8 +258,8 @@ class BucketsSampler(object):
     def __init__(self, buckets, batch_size, shuffle=False):
         self.batch_size = batch_size
         self.shuffle = shuffle
-        self.sizes, self.buckets = zip(* [(size, bucket)
-                                          for size, bucket in buckets.items()])
+        self.sizes, self.buckets = zip(*[(size, bucket)
+                                         for size, bucket in buckets.items()])
         # The number of chunks in each bucket, which is clipped by range [1, len(bucket)]
         self.chunks = []
         for size, bucket in zip(self.sizes, self.buckets):
diff --git a/examples/text_to_sql/IGSQL/eval_scripts/evaluation.py b/examples/text_to_sql/IGSQL/eval_scripts/evaluation.py
@@ -542,8 +542,8 @@ def print_scores(scores, etype):
 
     print("{:20} {:20} {:20} {:20} {:20} {:20}".format("", *levels))
     counts = [scores[level]['count'] for level in levels]
-    print("{:20} {:<20d} {:<20d} {:<20d} {:<20d} {:<20d}".format("count",
-                                                                 *counts))
+    print("{:20} {:<20d} {:<20d} {:<20d} {:<20d} {:<20d}".format("count", *
+                                                                 counts))
 
     if etype in ["all", "exec"]:
         print(
diff --git a/examples/text_to_sql/IGSQL/eval_scripts/evaluation_sqa.py b/examples/text_to_sql/IGSQL/eval_scripts/evaluation_sqa.py
@@ -598,8 +598,8 @@ def print_scores(scores, etype):
 
     print("\n\n{:20} {:20} {:20} {:20} {:20} {:20}".format("", *turns))
     counts = [scores[turn]['count'] for turn in turns]
-    print("{:20} {:<20d} {:<20d} {:<20d} {:<20d} {:<20d}".format("count",
-                                                                 *counts))
+    print("{:20} {:<20d} {:<20d} {:<20d} {:<20d} {:<20d}".format("count", *
+                                                                 counts))
 
     if etype in ["all", "exec"]:
         print(
diff --git a/model_zoo/README.md b/model_zoo/README.md
@@ -1,3 +1,3 @@
-# PaddleNLP Selected Model Zoo 
+# PaddleNLP Selected Model Zoo
 
 本目录是飞桨PaddleNLP精选模型库，提供了高质量的预训练模型和端到端的全流程部署工具链。
diff --git a/paddlenlp/datasets/dataset.py b/paddlenlp/datasets/dataset.py
@@ -65,7 +65,7 @@ class DatasetTuple:
     def __init__(self, splits):
         self.identifier_map, identifiers = self._gen_identifier_map(splits)
         self.tuple_cls = namedtuple('datasets', identifiers)
-        self.tuple = self.tuple_cls(* [None for _ in splits])
+        self.tuple = self.tuple_cls(*[None for _ in splits])
 
     def __getitem__(self, key):
         if isinstance(key, (int, slice)):
diff --git a/paddlenlp/datasets/nlpcc13_evsam05_hit.py b/paddlenlp/datasets/nlpcc13_evsam05_hit.py
@@ -84,7 +84,7 @@ def _read(self, filename, split):
 
         for i, line in enumerate(lines):
             if not line:
-                values = list(zip(* [j.split('\t') for j in lines[start:i]]))
+                values = list(zip(*[j.split('\t') for j in lines[start:i]]))
                 if split == "test":
                     ID, FORM, LEMMA, CPOS, POS, FEATS, HEAD, DEPREL = values
                 else:
diff --git a/paddlenlp/datasets/nlpcc13_evsam05_thu.py b/paddlenlp/datasets/nlpcc13_evsam05_thu.py
@@ -82,7 +82,7 @@ def _read(self, filename, split):
 
         for i, line in enumerate(lines):
             if not line:
-                values = list(zip(* [j.split('\t') for j in lines[start:i]]))
+                values = list(zip(*[j.split('\t') for j in lines[start:i]]))
 
                 ID, FORM, LEMMA, CPOS, POS, FEATS, HEAD, DEPREL = values
                 if values:

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`		`-# PaddleNLP Selected Model Zoo`
	`1`	`+# PaddleNLP Selected Model Zoo`
`2`	`2`
`3`	`3`	`本目录是飞桨PaddleNLP精选模型库，提供了高质量的预训练模型和端到端的全流程部署工具链。`