33# SPDX-License-Identifier: Apache-2.0
44
55from dataclasses import dataclass
6+ import datetime
67import functools
78from functools import cache
89import logging
1516import threading
1617import traceback
1718from typing import Callable , Optional
19+ import zipfile
1820
1921from cuda .tile ._ast2ir import get_function_ir
2022from cuda .tile ._cext import get_compute_capability , TileContext , default_tile_context
2123from cuda .tile ._compiler_options import CompilerOptions
2224from cuda .tile ._const_utils import get_constant_annotations
23- from cuda .tile ._exception import TileCompilerError , TileCompilerTimeoutError
25+ from cuda .tile ._exception import (
26+ TileCompilerError ,
27+ TileCompilerExecutionError ,
28+ TileCompilerTimeoutError ,
29+ )
2430from cuda .tile ._ir import ir
2531from cuda .tile ._passes .code_motion import hoist_loop_invariants
2632from cuda .tile ._passes .loop_split import split_loops
3642from cuda .tile ._passes .dce import dead_code_elimination_pass
3743from cuda .tile ._passes .token_order import token_order_pass
3844from cuda .tile ._ir2bytecode import generate_bytecode_for_kernel
45+ from cuda .tile ._version import __version__ as cutile_version
3946import cuda .tile ._bytecode as bc
4047
4148
@@ -101,6 +108,38 @@ def _log_mlir(bytecode_buf):
101108 print (f"Lowering\n ==== TILEIR MLIR module ====\n \n { text } " , file = sys .stderr )
102109
103110
111+ def _compiler_crash_dump (func_ir ,
112+ bytecode_generator ,
113+ error_msg ,
114+ compiler_flags ,
115+ compiler_version ):
116+ debug_info = (
117+ f"error:\n { error_msg } \n \n "
118+ f"compiler flags:\n { compiler_flags } \n \n "
119+ f"compiler version:\n { compiler_version or 'Unkown' } \n \n "
120+ f"cutile version:\n { cutile_version } \n "
121+ )
122+
123+ # Anonymize debug attributes in the bytecode
124+ bytecode_buf = bytearray ()
125+ with bc .write_bytecode (num_functions = 1 , buf = bytecode_buf ) as writer :
126+ bytecode_generator (writer , anonymize_debug_attr = True )
127+
128+ artifacts = {
129+ f"{ func_ir .qualname } .bytecode" : bytes (bytecode_buf ),
130+ f"{ func_ir .qualname } .cutileir" : f"{ func_ir .to_string (include_loc = False )} \n " ,
131+ "debug_info.txt" : debug_info ,
132+ }
133+
134+ timestamp = datetime .datetime .now ().timestamp ()
135+ zip_filename = os .path .abspath (f"crash_dump_{ func_ir .qualname } _{ timestamp } .zip" )
136+ print (f"Dumping crash artifacts to { zip_filename } \n " , file = sys .stderr )
137+
138+ with zipfile .ZipFile (zip_filename , "w" ) as z :
139+ for filename , content in artifacts .items ():
140+ z .writestr (filename , content )
141+
142+
104143@global_compiler_lock
105144def compile_tile (pyfunc ,
106145 args ,
@@ -115,9 +154,12 @@ def compile_tile(pyfunc,
115154
116155 sm_arch = get_sm_arch ()
117156
157+ bytecode_generator = functools .partial (generate_bytecode_for_kernel ,
158+ func_ir , compiler_options , sm_arch )
159+
118160 bytecode_buf = bytearray ()
119161 with bc .write_bytecode (num_functions = 1 , buf = bytecode_buf ) as writer :
120- generate_bytecode_for_kernel ( func_ir , compiler_options , sm_arch , writer )
162+ bytecode_generator ( writer , anonymize_debug_attr = False )
121163
122164 if 'TILEIR' in context .config .log_keys :
123165 _log_mlir (bytecode_buf )
@@ -150,14 +192,21 @@ def compile_tile(pyfunc,
150192 print ("Can't print MLIR because the internal extension is missing" , file = sys .stderr )
151193
152194 # Compile MLIR module and generate cubin
153- with tempfile .NamedTemporaryFile (suffix = '.mlirbc ' , prefix = func_ir .qualname ,
195+ with tempfile .NamedTemporaryFile (suffix = '.bytecode ' , prefix = func_ir .qualname ,
154196 dir = context .config .temp_dir , delete = False ) as f :
155197 f .write (bytecode_buf )
156198 f .flush ()
157- cubin_file = compile_cubin (f .name ,
158- compiler_options ,
159- sm_arch ,
160- timeout_sec = context .config .compiler_timeout_sec )
199+
200+ try :
201+ cubin_file = compile_cubin (f .name , compiler_options , sm_arch ,
202+ timeout_sec = context .config .compiler_timeout_sec )
203+ except TileCompilerError as e :
204+ if context .config .enable_crash_dump :
205+ _compiler_crash_dump (func_ir , bytecode_generator , e .message ,
206+ e .compiler_flags , e .compiler_version )
207+
208+ raise e
209+
161210 return TileLibrary (func_ir .qualname , cubin_file , bytecode_buf , func_ir )
162211
163212
@@ -223,6 +272,15 @@ def _find_compiler_bin() -> tuple[str, str, str]:
223272 f"make sure it is available in $PATH or ${ cuda_home_var } /bin" )
224273
225274
275+ def _try_get_compiler_version (compiler_bin ) -> Optional [str ]:
276+ try :
277+ res = subprocess .run ([str (compiler_bin ), "--version" ],
278+ check = True , capture_output = True , text = True )
279+ return res .stdout
280+ except Exception :
281+ return None
282+
283+
226284@cache
227285def get_sm_arch () -> str :
228286 major , minor = get_compute_capability ()
@@ -237,30 +295,37 @@ def compile_cubin(
237295 compiler_bin , bin_path , ld_path = _find_compiler_bin ()
238296 fname_cubin = Path (fname_bytecode ).with_suffix (".cubin" )
239297 compiler_hints = compiler_options .specialize_for_target (sm_arch )
298+
240299 command = [
241300 str (compiler_bin ),
242301 str (fname_bytecode ),
302+ "-o" ,
303+ str (fname_cubin ),
304+ ]
305+
306+ flags = [
243307 "--gpu-name" ,
244308 sm_arch ,
245309 f"-O{ compiler_hints .opt_level } " ,
246- "-o" ,
247- str (fname_cubin ),
310+ "--lineinfo"
248311 ]
249- # compile with line info
250- command .append ("--lineinfo" )
251- logger .debug (f"Invoke tile compiler: { ' ' .join (command )} \n "
312+
313+ logger .debug (f"Invoke tile compiler: { ' ' .join (command + flags )} \n "
252314 f"LD_LIBRARY_PATH:{ ld_path } \n "
253315 f"PATH:{ bin_path } " )
254316 try :
255317 env = os .environ .copy ()
256318 env ['LD_LIBRARY_PATH' ] = ld_path
257319 env ['PATH' ] = bin_path
258- subprocess .run (command , env = env , check = True , capture_output = True , timeout = timeout_sec )
320+ subprocess .run (command + flags , env = env , check = True , capture_output = True ,
321+ timeout = timeout_sec )
259322 except subprocess .CalledProcessError as e :
260- raise TileCompilerError (e .returncode , e .stderr .decode ())
323+ raise TileCompilerExecutionError (e .returncode , e .stderr .decode (), ' ' .join (flags ),
324+ _try_get_compiler_version (compiler_bin ))
261325 except subprocess .TimeoutExpired :
262326 message = (f"`tileiras` compiler exceeded timeout { timeout_sec } s. "
263327 "Using a smaller tile size may reduce compilation time." )
264- raise TileCompilerTimeoutError (message )
328+ raise TileCompilerTimeoutError (message , ' ' .join (flags ),
329+ _try_get_compiler_version (compiler_bin ))
265330
266331 return fname_cubin
0 commit comments