Xiaoxia/fp v1 (deepspeedai#871)

xiaoxiawu-microsoft · Ubuntu · web-flow · commit f415ec82cb22 · 2024-03-07T20:00:04.000-08:00
* add FP6-benchmark

* update

* updatefile

* update tpsize

---------

Co-authored-by: Ubuntu &lt;deepspeed@DS-A100-Largedisk.3xui22esprkudm2r30sfjzexrc.bx.internal.cloudapp.net&gt;
diff --git a/benchmarks/inference/mii/run_fp6.sh b/benchmarks/inference/mii/run_fp6.sh
@@ -0,0 +1,10 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+MODELS=(NousResearch/Llama-2-70b-hf)
+
+for MODEL in ${MODELS[@]}; do
+    python ./run_benchmark.py --model ${MODEL} --num_requests 128 --stream --backend fastgen --fp6  --tp_size 1
+done
diff --git a/benchmarks/inference/mii/src/server.py b/benchmarks/inference/mii/src/server.py
@@ -71,13 +71,17 @@ def start_fastgen_server(args: argparse.Namespace) -> None:
     inference_config = RaggedInferenceEngineConfig(
         tensor_parallel=tp_config, state_manager=mgr_config
     )
-
+    if args.fp6:
+        quantization_mode = 'wf6af16'
+    else:
+        quantization_mode = None
     mii.serve(
         args.model,
         deployment_name=args.deployment_name,
         tensor_parallel=args.tp_size,
         inference_engine_config=inference_config,
         replica_num=args.num_replicas,
+        quantization_mode=quantization_mode
     )
 
 
diff --git a/benchmarks/inference/mii/src/utils.py b/benchmarks/inference/mii/src/utils.py
@@ -159,6 +159,9 @@ def parse_args(
     parser.add_argument(
         "--overwrite_results", action="store_true", help="Overwrite existing results"
     )
+    parser.add_argument(
+        "--fp6", action="store_true", help="Enable FP6"
+    )
 
     # Parse arguments
     args = parser.parse_args()

Original file line number	Diff line number	Diff line change
`@@ -159,6 +159,9 @@ def parse_args(`
`159`	`159`	`parser.add_argument(`
`160`	`160`	`"--overwrite_results", action="store_true", help="Overwrite existing results"`
`161`	`161`	`)`
	`162`	`+ parser.add_argument(`
	`163`	`+ "--fp6", action="store_true", help="Enable FP6"`
	`164`	`+ )`
`162`	`165`
`163`	`166`	`# Parse arguments`
`164`	`167`	`args = parser.parse_args()`