Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 09be170

Browse files
committed
Merge branch 'boyanl/debug-artifacts' into 'main'
dump debug artifacts when TileCompilerError is raised See merge request dl/tileir/cutile-python!32
2 parents a12c0d8 + 8cb1301 commit 09be170

File tree

5 files changed

+125
-23
lines changed

5 files changed

+125
-23
lines changed

src/cuda/tile/_compile.py

Lines changed: 80 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from dataclasses import dataclass
6+
import datetime
67
import functools
78
from functools import cache
89
import logging
@@ -15,12 +16,17 @@
1516
import threading
1617
import traceback
1718
from typing import Callable, Optional
19+
import zipfile
1820

1921
from cuda.tile._ast2ir import get_function_ir
2022
from cuda.tile._cext import get_compute_capability, TileContext, default_tile_context
2123
from cuda.tile._compiler_options import CompilerOptions
2224
from cuda.tile._const_utils import get_constant_annotations
23-
from cuda.tile._exception import TileCompilerError, TileCompilerTimeoutError
25+
from cuda.tile._exception import (
26+
TileCompilerError,
27+
TileCompilerExecutionError,
28+
TileCompilerTimeoutError,
29+
)
2430
from cuda.tile._ir import ir
2531
from cuda.tile._passes.code_motion import hoist_loop_invariants
2632
from cuda.tile._passes.loop_split import split_loops
@@ -36,6 +42,7 @@
3642
from cuda.tile._passes.dce import dead_code_elimination_pass
3743
from cuda.tile._passes.token_order import token_order_pass
3844
from cuda.tile._ir2bytecode import generate_bytecode_for_kernel
45+
from cuda.tile._version import __version__ as cutile_version
3946
import cuda.tile._bytecode as bc
4047

4148

@@ -101,6 +108,38 @@ def _log_mlir(bytecode_buf):
101108
print(f"Lowering\n==== TILEIR MLIR module ====\n\n{text}", file=sys.stderr)
102109

103110

111+
def _compiler_crash_dump(func_ir,
112+
bytecode_generator,
113+
error_msg,
114+
compiler_flags,
115+
compiler_version):
116+
debug_info = (
117+
f"error:\n{error_msg}\n\n"
118+
f"compiler flags:\n{compiler_flags}\n\n"
119+
f"compiler version:\n{compiler_version or 'Unkown'}\n\n"
120+
f"cutile version:\n{cutile_version}\n"
121+
)
122+
123+
# Anonymize debug attributes in the bytecode
124+
bytecode_buf = bytearray()
125+
with bc.write_bytecode(num_functions=1, buf=bytecode_buf) as writer:
126+
bytecode_generator(writer, anonymize_debug_attr=True)
127+
128+
artifacts = {
129+
f"{func_ir.qualname}.bytecode": bytes(bytecode_buf),
130+
f"{func_ir.qualname}.cutileir": f"{func_ir.to_string(include_loc=False)}\n",
131+
"debug_info.txt": debug_info,
132+
}
133+
134+
timestamp = datetime.datetime.now().timestamp()
135+
zip_filename = os.path.abspath(f"crash_dump_{func_ir.qualname}_{timestamp}.zip")
136+
print(f"Dumping crash artifacts to {zip_filename}\n", file=sys.stderr)
137+
138+
with zipfile.ZipFile(zip_filename, "w") as z:
139+
for filename, content in artifacts.items():
140+
z.writestr(filename, content)
141+
142+
104143
@global_compiler_lock
105144
def compile_tile(pyfunc,
106145
args,
@@ -115,9 +154,12 @@ def compile_tile(pyfunc,
115154

116155
sm_arch = get_sm_arch()
117156

157+
bytecode_generator = functools.partial(generate_bytecode_for_kernel,
158+
func_ir, compiler_options, sm_arch)
159+
118160
bytecode_buf = bytearray()
119161
with bc.write_bytecode(num_functions=1, buf=bytecode_buf) as writer:
120-
generate_bytecode_for_kernel(func_ir, compiler_options, sm_arch, writer)
162+
bytecode_generator(writer, anonymize_debug_attr=False)
121163

122164
if 'TILEIR' in context.config.log_keys:
123165
_log_mlir(bytecode_buf)
@@ -150,14 +192,21 @@ def compile_tile(pyfunc,
150192
print("Can't print MLIR because the internal extension is missing", file=sys.stderr)
151193

152194
# Compile MLIR module and generate cubin
153-
with tempfile.NamedTemporaryFile(suffix='.mlirbc', prefix=func_ir.qualname,
195+
with tempfile.NamedTemporaryFile(suffix='.bytecode', prefix=func_ir.qualname,
154196
dir=context.config.temp_dir, delete=False) as f:
155197
f.write(bytecode_buf)
156198
f.flush()
157-
cubin_file = compile_cubin(f.name,
158-
compiler_options,
159-
sm_arch,
160-
timeout_sec=context.config.compiler_timeout_sec)
199+
200+
try:
201+
cubin_file = compile_cubin(f.name, compiler_options, sm_arch,
202+
timeout_sec=context.config.compiler_timeout_sec)
203+
except TileCompilerError as e:
204+
if context.config.enable_crash_dump:
205+
_compiler_crash_dump(func_ir, bytecode_generator, e.message,
206+
e.compiler_flags, e.compiler_version)
207+
208+
raise e
209+
161210
return TileLibrary(func_ir.qualname, cubin_file, bytecode_buf, func_ir)
162211

163212

@@ -223,6 +272,15 @@ def _find_compiler_bin() -> tuple[str, str, str]:
223272
f"make sure it is available in $PATH or ${cuda_home_var}/bin")
224273

225274

275+
def _try_get_compiler_version(compiler_bin) -> Optional[str]:
276+
try:
277+
res = subprocess.run([str(compiler_bin), "--version"],
278+
check=True, capture_output=True, text=True)
279+
return res.stdout
280+
except Exception:
281+
return None
282+
283+
226284
@cache
227285
def get_sm_arch() -> str:
228286
major, minor = get_compute_capability()
@@ -237,30 +295,37 @@ def compile_cubin(
237295
compiler_bin, bin_path, ld_path = _find_compiler_bin()
238296
fname_cubin = Path(fname_bytecode).with_suffix(".cubin")
239297
compiler_hints = compiler_options.specialize_for_target(sm_arch)
298+
240299
command = [
241300
str(compiler_bin),
242301
str(fname_bytecode),
302+
"-o",
303+
str(fname_cubin),
304+
]
305+
306+
flags = [
243307
"--gpu-name",
244308
sm_arch,
245309
f"-O{compiler_hints.opt_level}",
246-
"-o",
247-
str(fname_cubin),
310+
"--lineinfo"
248311
]
249-
# compile with line info
250-
command.append("--lineinfo")
251-
logger.debug(f"Invoke tile compiler: {' '.join(command)}\n"
312+
313+
logger.debug(f"Invoke tile compiler: {' '.join(command + flags)}\n"
252314
f"LD_LIBRARY_PATH:{ld_path}\n"
253315
f"PATH:{bin_path}")
254316
try:
255317
env = os.environ.copy()
256318
env['LD_LIBRARY_PATH'] = ld_path
257319
env['PATH'] = bin_path
258-
subprocess.run(command, env=env, check=True, capture_output=True, timeout=timeout_sec)
320+
subprocess.run(command + flags, env=env, check=True, capture_output=True,
321+
timeout=timeout_sec)
259322
except subprocess.CalledProcessError as e:
260-
raise TileCompilerError(e.returncode, e.stderr.decode())
323+
raise TileCompilerExecutionError(e.returncode, e.stderr.decode(), ' '.join(flags),
324+
_try_get_compiler_version(compiler_bin))
261325
except subprocess.TimeoutExpired:
262326
message = (f"`tileiras` compiler exceeded timeout {timeout_sec}s. "
263327
"Using a smaller tile size may reduce compilation time.")
264-
raise TileCompilerTimeoutError(message)
328+
raise TileCompilerTimeoutError(message, ' '.join(flags),
329+
_try_get_compiler_version(compiler_bin))
265330

266331
return fname_cubin

src/cuda/tile/_context.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ class TileContextConfig:
1515
temp_dir: str
1616
log_keys: list[str]
1717
compiler_timeout_sec: Optional[int]
18+
enable_crash_dump: bool
1819

1920

2021
def init_context_config_from_env():
2122
config = TileContextConfig(
2223
temp_dir=get_temp_dir_from_env(),
2324
log_keys=get_log_keys_from_env(),
24-
compiler_timeout_sec=get_compile_timeout_from_env()
25+
compiler_timeout_sec=get_compile_timeout_from_env(),
26+
enable_crash_dump=get_enable_crash_dump_from_env()
2527
)
2628
return config
2729

@@ -61,3 +63,9 @@ def get_temp_dir_from_env() -> str:
6163
dir = tempfile.mkdtemp()
6264
atexit.register(_clean_tmp_dir, dir)
6365
return dir
66+
67+
68+
def get_enable_crash_dump_from_env() -> bool:
69+
key = "CUDA_TILE_ENABLE_CRASH_DUMP"
70+
env = os.environ.get(key, "0").lower()
71+
return env in ("1", "true", "yes", "on")

src/cuda/tile/_debug.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import logging
56
import os
67

78
# Comma separated string for selective debug logging
@@ -36,3 +37,6 @@ def parse_cuda_tile_logs_env():
3637

3738
CUDA_TILE_TESTING_DISABLE_TOKEN_ORDER = (
3839
os.environ.get("CUDA_TILE_TESTING_DISABLE_TOKEN_ORDER", "0") == "1")
40+
41+
42+
logger = logging.getLogger(__name__)

src/cuda/tile/_exception.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,32 @@ def _parse_tileir_stderr(stderr: str) -> tuple[str, Optional[Loc]]:
156156

157157

158158
class TileCompilerError(TileInternalError):
159-
def __init__(self, return_code: int, stderr: str):
159+
def __init__(self,
160+
message: str,
161+
loc: Loc,
162+
compiler_flags: str,
163+
compiler_version: Optional[str]):
164+
super().__init__(message, loc)
165+
self.compiler_flags = compiler_flags
166+
self.compiler_version = compiler_version
167+
168+
169+
class TileCompilerExecutionError(TileCompilerError):
170+
def __init__(self,
171+
return_code: int,
172+
stderr: str,
173+
compiler_flags: str,
174+
compiler_version: Optional[str]):
160175
message, loc = _parse_tileir_stderr(stderr)
161176
if loc is None:
162177
loc = _unknown_loc
163-
super().__init__(f"Return code {return_code}\n{message}", loc)
178+
super().__init__(f"Return code {return_code}\n{message}", loc,
179+
compiler_flags, compiler_version)
164180

165181

166-
class TileCompilerTimeoutError(TileInternalError):
167-
pass
182+
class TileCompilerTimeoutError(TileCompilerError):
183+
def __init__(self,
184+
message: str,
185+
compiler_flags: str,
186+
compiler_version: Optional[str]):
187+
super().__init__(message, _unknown_loc, compiler_flags, compiler_version)

src/cuda/tile/_ir2bytecode.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -466,10 +466,11 @@ def lower_scan(ctx: "BytecodeContext", x: bc.Value, input_ty: Type,
466466

467467

468468
class DebugAttrMap:
469-
def __init__(self, debug_attr_table: bc.DebugAttrTable, linkage_name: str):
469+
def __init__(self, debug_attr_table: bc.DebugAttrTable, linkage_name: str, anonymize: bool):
470470
self._subprogram_cache = {}
471471
self._debug_attr_table = debug_attr_table
472472
self._linkage_name = linkage_name
473+
self._anonymize = anonymize
473474

474475
def get_subprogram(self, pyfunc) -> bc.DebugAttrId:
475476
try:
@@ -495,6 +496,9 @@ def get_subprogram(self, pyfunc) -> bc.DebugAttrId:
495496
return ret
496497

497498
def get_debugattr(self, loc: Loc) -> bc.DebugAttrId:
499+
if self._anonymize:
500+
return bc.MISSING_DEBUG_ATTR_ID
501+
498502
subprogram = self.get_subprogram(loc.function)
499503
attr = self._debug_attr_table.loc(subprogram, loc.filename, loc.line, loc.col)
500504
if loc.call_site is not None:
@@ -809,7 +813,8 @@ def generate_bytecode_for_block(ctx: BytecodeContext, block: Block):
809813
def generate_bytecode_for_kernel(func_ir: Function,
810814
compiler_options: CompilerOptions,
811815
sm_arch: str,
812-
writer: bc.BytecodeWriter):
816+
writer: bc.BytecodeWriter,
817+
anonymize_debug_attr: bool):
813818
target_options = compiler_options.specialize_for_target(sm_arch)
814819
entry_hints = bc.EntryHints(num_cta_in_cga=target_options.num_ctas,
815820
occupancy=target_options.occupancy)
@@ -822,7 +827,7 @@ def generate_bytecode_for_kernel(func_ir: Function,
822827
param_type_ids.extend(typeid_tuple(writer.type_table, ty))
823828
param_offsets.append(len(param_type_ids))
824829

825-
debug_attr_map = DebugAttrMap(writer.debug_attr_table, func_ir.qualname)
830+
debug_attr_map = DebugAttrMap(writer.debug_attr_table, func_ir.qualname, anonymize_debug_attr)
826831
func_debug_attr = debug_attr_map.get_debugattr(func_ir.loc)
827832

828833
with writer.function(name=func_ir.qualname,

0 commit comments

Comments
 (0)