PaddlePaddle · TeslaZhao · Nov 16, 2021 · Nov 16, 2021 · Nov 16, 2021 · Nov 16, 2021
diff --git a/doc/Latest_Packages_CN.md b/doc/Latest_Packages_CN.md
@@ -80,7 +80,7 @@ https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-0.0.0.tar.gz
 # Cuda 10.2 + Cudnn 8
 https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-1028-0.0.0.tar.gz
 # Cuda 11.2
-https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda112-0.0.0.tar.gz
+https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-112-0.0.0.tar.gz
 ```
 
 #### How to setup SERVING_BIN offline?

diff --git a/doc/Run_On_Kubernetes_CN.md b/doc/Run_On_Kubernetes_CN.md
@@ -25,10 +25,11 @@ kubectl apply -f https://bit.ly/kong-ingress-dbless
 在`tools/generate_runtime_docker.sh`文件下，它的使用方式如下
 
 ```bash
-bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36
+bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --name serving_runtime:cuda10.1-py36
 ```
 
-会生成 cuda10.1，python 3.6，serving版本0.6.0 还有 paddle版本2.0.1的运行镜像。如果有其他疑问，可以执行下列语句得到帮助信息。
+会生成 cuda10.1，python 3.6，serving版本0.7.0 还有 paddle版本2.2.0的运行镜像。如果有其他疑问，可以执行下列语句得到帮助信息。
+如果您需要老版本Serving运行镜像，请checkout到老版本分支。
 
 ```
 bash tools/generate_runtime_docker.sh --help
@@ -83,8 +84,8 @@ python3.6 web_service.py
 web service模式本质上和pipeline模式类似，因此我们以`Serving/examples/C++/PaddleNLP/bert`为例
 
 ```bash
-#假设您已经拥有Serving运行镜像，假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-py36
-docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.6.0-cpu-py36 bash
+#假设您已经拥有Serving运行镜像，假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.7.0-cpu-py36
+docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.7.0-cpu-py36 bash
 cd Serving/examples/C++/PaddleNLP/bert
 ### download model 
 wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz

diff --git a/tools/dockerfiles/build_scripts/install_trt.sh b/tools/dockerfiles/build_scripts/install_trt.sh
@@ -24,7 +24,7 @@ if [[ "$VERSION" == "cuda10.1" ]];then
   rm TensorRT6-cuda10.1-cudnn7.tar.gz
 elif [[ "$VERSION" == "cuda11.2" ]];then
   wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz --no-check-certificate
-  tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz
+  tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz -C /usr/local
   cp -rf /usr/local/TensorRT-8.0.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.0.3.4/lib/* /usr/lib/
   rm -rf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz
 elif [[ "$VERSION" == "cuda10.2" ]];then

diff --git a/tools/dockerfiles/build_scripts/install_whl.sh b/tools/dockerfiles/build_scripts/install_whl.sh
@@ -20,7 +20,7 @@ RUN_ENV=$3 # cpu/10.1 10.2
 PYTHON_VERSION=$4
 serving_release=
 client_release="paddle-serving-client==$SERVING_VERSION"
-app_release="paddle-serving-app==0.3.1"
+app_release="paddle-serving-app==$SERVING_VERSION"
 
 if [[ $PYTHON_VERSION == "3.6" ]];then
     CPYTHON="36"
@@ -33,48 +33,28 @@ elif [[ $PYTHON_VERSION == "3.8" ]];then
     CPYTHON_PADDLE="38"
 fi
 
-if [[ $SERVING_VERSION == "0.5.0" ]]; then
-    if [[ "$RUN_ENV" == "cpu" ]];then
-        server_release="paddle-serving-server==$SERVING_VERSION"
-        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/$PADDLE_VERSION-cpu-avx-mkl/paddlepaddle-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    elif [[ "$RUN_ENV" == "cuda10.1" ]];then
-        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post101"
-        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-101-${SERVING_VERSION}.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    elif [[ "$RUN_ENV" == "cuda10.2" ]];then
-        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post102"
-        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    elif [[ "$RUN_ENV" == "cuda11" ]];then
-        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post11"
-        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda11-${SERVING_VERSION}.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post110-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    fi
-    client_release="paddle-serving-client==$SERVING_VERSION"
-    app_release="paddle-serving-app==0.3.1"
-else 
-    if [[ "$RUN_ENV" == "cpu" ]];then
-        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl"
-        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/$PADDLE_VERSION-cpu-avx-mkl/paddlepaddle-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    elif [[ "$RUN_ENV" == "cuda10.1" ]];then
-        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post101-py3-none-any.whl"
-        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-$SERVING_VERSION.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    elif [[ "$RUN_ENV" == "cuda10.2" ]];then
-        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post102-py3-none-any.whl"
-        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-$SERVING_VERSION.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    elif [[ "$RUN_ENV" == "cuda11" ]];then
-        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post11-py3-none-any.whl"
-        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda11-$SERVING_VERSION.tar.gz"
-        paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post110-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
-    fi
-    client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl"
-    app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl"
+if [[ "$RUN_ENV" == "cpu" ]];then
+  server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl"
+  serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz"
+  paddle_whl="paddlepaddle==$PADDLE_VERSION"
+elif [[ "$RUN_ENV" == "cuda10.1" ]];then
+  server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post101-py3-none-any.whl"
+  serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-$SERVING_VERSION.tar.gz"
+  paddle_whl="https://paddle-inference-lib.bj.bcebos.com/$PADDLE_VERSION/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
+elif [[ "$RUN_ENV" == "cuda10.2" ]] ;then
+  server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post1028-py3-none-any.whl"
+  serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-1028-$SERVING_VERSION.tar.gz"
+  paddle_whl="https://paddle-inference-lib.bj.bcebos.com/$PADDLE_VERSION/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.2_cudnn8.1.1_trt7.2.3.4/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
+elif [[ "$RUN_ENV" == "cuda11.2" ]];then
+  server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post112-py3-none-any.whl"
+  serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-112-$SERVING_VERSION.tar.gz"
+  paddle_whl="https://paddle-inference-lib.bj.bcebos.com/$PADDLE_VERSION/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda11.2_cudnn8.2.1_trt8.0.3.4/paddlepaddle_gpu-$PADDLE_VERSION.post112-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl"
 fi
 
+client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl"
+app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl"
+
+
 if [[ "$RUN_ENV" == "cpu" ]];then
     python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
     python$PYTHON_VERSION -m pip install $paddle_whl
@@ -105,15 +85,15 @@ elif [[ "$RUN_ENV" == "cuda10.2" ]];then
     echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
     rm -rf serving-gpu-102-${SERVING_VERSION}.tar.gz
     cd -
-elif [[ "$RUN_ENV" == "cuda11" ]];then
+elif [[ "$RUN_ENV" == "cuda11.2" ]];then
     python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
     python$PYTHON_VERSION -m pip install $paddle_whl
     cd /usr/local/
     wget $serving_bin
-    tar xf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz
-    mv $PWD/serving-gpu-cuda11-${SERVING_VERSION} $PWD/serving_bin
+    tar xf serving-gpu-112-${SERVING_VERSION}.tar.gz
+    mv $PWD/serving-gpu-112-${SERVING_VERSION} $PWD/serving_bin
     echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
-    rm -rf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz
+    rm -rf serving-gpu-112-${SERVING_VERSION}.tar.gz
     cd -
 fi
 

diff --git a/tools/generate_runtime_docker.sh b/tools/generate_runtime_docker.sh
@@ -7,10 +7,10 @@ function usage
 {
     echo "usage: sh tools/generate_runtime_docker.sh --SOME_ARG ARG_VALUE"
     echo "   ";
-    echo "   --env                 : running env, cpu/cuda10.1/cuda10.2/cuda11";
+    echo "   --env                 : running env, cpu/cuda10.1/cuda10.2/cuda11.2";
     echo "   --python              : python version, 3.6/3.7/3.8 ";
-    echo "   --serving             : serving version(0.6.0)";
-    echo "   --paddle              : paddle version(2.1.0)"
+    #echo "   --serving             : serving version(0.6.0/0.6.2)";
+    #echo "   --paddle              : paddle version(2.1.0/2.2.0)"
     echo "   --image_name          : image name(default serving_runtime:env-python)"
     echo "  -h | --help            : helper";
 }
@@ -25,8 +25,8 @@ function parse_args
       case "$1" in
           --env )               env="$2";             shift;;
           --python )            python="$2";     shift;;
-          --serving )           serving="$2";      shift;;
-          --paddle )            paddle="$2";      shift;;
+          #--serving )           serving="$2";      shift;;
+          #--paddle )            paddle="$2";      shift;;
       --image_name )          image_name="$2";    shift;;
           -h | --help )         usage;            exit;; # quit and show usage
           * )                 args+=("$1")             # if no match, add it to the positional args
@@ -66,9 +66,11 @@ function run
       base_image="nvidia\/cuda:10.1-cudnn7-runtime-ubuntu16.04"
   elif [ $env == "cuda10.2" ]; then
       base_image="nvidia\/cuda:10.2-cudnn8-runtime-ubuntu16.04"
-  elif [ $env == "cuda11" ]; then
-      base_image="nvidia\/cuda:11.0.3-cudnn8-runtime-ubuntu16.04"
+  elif [ $env == "cuda11.2" ]; then
+      base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04"
   fi
+  python="2.2.0"
+  serving="0.7.0"
   echo "base image: $base_image"
   echo "named arg: python: $python"
   echo "named arg: serving: $serving"