Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ca41e8b

Browse files
wangruohuiawan-10mrwyattii
authored
support trust_remote_code in inference test (deepspeedai#709)
* support trust_remote_code * make trust_remote _code as an argument --------- Co-authored-by: Ammar Ahmad Awan <[email protected]> Co-authored-by: Michael Wyatt <[email protected]>
1 parent 0d11c63 commit ca41e8b

3 files changed

Lines changed: 11 additions & 9 deletions

File tree

inference/huggingface/text-generation/arguments.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@
1717
parser.add_argument("--test_performance", action='store_true', help="enable latency, bandwidth, and throughout testing")
1818
parser.add_argument("--local_rank", type=int, default=int(os.getenv("LOCAL_RANK", "0")), help="local rank")
1919
parser.add_argument("--world_size", type=int, default=int(os.getenv("WORLD_SIZE", "1")), help="world_size")
20-
parser.add_argument("--test_hybrid_engine", action='store_true', help="enable hybrid engine testing")
20+
parser.add_argument("--test_hybrid_engine", action='store_true', help="enable hybrid engine testing")
21+
parser.add_argument("--trust_remote_code", action='store_true', help="Trust remote code for hugging face models")

inference/huggingface/text-generation/inference-test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
dtype=data_type,
2525
is_meta=args.use_meta_tensor,
2626
device=args.local_rank,
27-
checkpoint_path=args.checkpoint_path)
27+
checkpoint_path=args.checkpoint_path,
28+
trust_remote_code=args.trust_remote_code)
2829

2930
if args.local_rank == 0:
3031
print(f"initialization time: {(time.time()-t0) * 1000}ms")
@@ -51,7 +52,7 @@
5152
save_mp_checkpoint_path=args.save_mp_checkpoint_path,
5253
**ds_kwargs
5354
)
54-
55+
5556
if args.local_rank == 0:
5657
see_memory_usage("after init_inference", True)
5758

@@ -90,4 +91,3 @@
9091
print(f"\nin={i}\nout={o}\n{'-'*60}")
9192
if args.test_performance:
9293
Performance.print_perf_stats(map(lambda t: t / args.max_new_tokens, times), pipe.model.config, args.dtype, args.batch_size)
93-

inference/huggingface/text-generation/utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def __init__(self,
2121
dtype=torch.float16,
2222
is_meta=True,
2323
device=-1,
24-
checkpoint_path=None
24+
checkpoint_path=None,
25+
trust_remote_code=False,
2526
):
2627
self.model_name = model_name
2728
self.dtype = dtype
@@ -38,18 +39,18 @@ def __init__(self,
3839
# the Deepspeed team made these so it's super fast to load (~1 minute), rather than wait 10-20min loading time.
3940
self.tp_presharded_models = ["microsoft/bloom-deepspeed-inference-int8", "microsoft/bloom-deepspeed-inference-fp16"]
4041

41-
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
42+
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=trust_remote_code)
4243
self.tokenizer.pad_token = self.tokenizer.eos_token
4344

4445
if (is_meta):
4546
'''When meta tensors enabled, use checkpoints'''
46-
self.config = AutoConfig.from_pretrained(self.model_name)
47+
self.config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=trust_remote_code)
4748
self.repo_root, self.checkpoints_json = self._generate_json(checkpoint_path)
4849

4950
with deepspeed.OnDevice(dtype=torch.float16, device="meta"):
50-
self.model = AutoModelForCausalLM.from_config(self.config)
51+
self.model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=trust_remote_code)
5152
else:
52-
self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
53+
self.model = AutoModelForCausalLM.from_pretrained(self.model_name, trust_remote_code=trust_remote_code)
5354

5455
self.model.eval()
5556

0 commit comments

Comments
 (0)