Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f415ec8

Browse files
xiaoxiawu-microsoftUbuntu
andauthored
Xiaoxia/fp v1 (deepspeedai#871)
* add FP6-benchmark * update * updatefile * update tpsize --------- Co-authored-by: Ubuntu <deepspeed@DS-A100-Largedisk.3xui22esprkudm2r30sfjzexrc.bx.internal.cloudapp.net>
1 parent 6e9ada6 commit f415ec8

3 files changed

Lines changed: 18 additions & 1 deletion

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# DeepSpeed Team
5+
6+
MODELS=(NousResearch/Llama-2-70b-hf)
7+
8+
for MODEL in ${MODELS[@]}; do
9+
python ./run_benchmark.py --model ${MODEL} --num_requests 128 --stream --backend fastgen --fp6 --tp_size 1
10+
done

benchmarks/inference/mii/src/server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,17 @@ def start_fastgen_server(args: argparse.Namespace) -> None:
7171
inference_config = RaggedInferenceEngineConfig(
7272
tensor_parallel=tp_config, state_manager=mgr_config
7373
)
74-
74+
if args.fp6:
75+
quantization_mode = 'wf6af16'
76+
else:
77+
quantization_mode = None
7578
mii.serve(
7679
args.model,
7780
deployment_name=args.deployment_name,
7881
tensor_parallel=args.tp_size,
7982
inference_engine_config=inference_config,
8083
replica_num=args.num_replicas,
84+
quantization_mode=quantization_mode
8185
)
8286

8387

benchmarks/inference/mii/src/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ def parse_args(
159159
parser.add_argument(
160160
"--overwrite_results", action="store_true", help="Overwrite existing results"
161161
)
162+
parser.add_argument(
163+
"--fp6", action="store_true", help="Enable FP6"
164+
)
162165

163166
# Parse arguments
164167
args = parser.parse_args()

0 commit comments

Comments
 (0)