2323from mypyc .irbuild .prepare import load_type_map
2424from mypyc .irbuild .mapper import Mapper
2525from mypyc .common import (
26- PREFIX , TOP_LEVEL_NAME , INT_PREFIX , MODULE_PREFIX , RUNTIME_C_FILES , USE_FASTCALL ,
26+ PREFIX , TOP_LEVEL_NAME , MODULE_PREFIX , RUNTIME_C_FILES , USE_FASTCALL ,
2727 USE_VECTORCALL , shared_lib_name ,
2828)
29- from mypyc .codegen .cstring import encode_as_c_string , encode_bytes_as_c_string
29+ from mypyc .codegen .cstring import c_string_initializer
30+ from mypyc .codegen .literals import Literals
3031from mypyc .codegen .emit import EmitterContext , Emitter , HeaderDeclaration
3132from mypyc .codegen .emitfunc import generate_native_function , native_function_header
3233from mypyc .codegen .emitclass import generate_class_type_decl , generate_class
3334from mypyc .codegen .emitwrapper import (
3435 generate_wrapper_function , wrapper_function_header ,
3536 generate_legacy_wrapper_function , legacy_wrapper_function_header ,
3637)
37- from mypyc .ir .ops import LiteralsMap , DeserMaps
38+ from mypyc .ir .ops import DeserMaps , LoadLiteral
3839from mypyc .ir .rtypes import RType , RTuple
3940from mypyc .ir .func_ir import FuncIR
4041from mypyc .ir .class_ir import ClassIR
@@ -286,9 +287,8 @@ def compile_ir_to_c(
286287 if not group_modules :
287288 ctext [group_name ] = []
288289 continue
289- literals = mapper .literals [group_name ]
290290 generator = GroupGenerator (
291- literals , group_modules , source_paths ,
291+ group_modules , source_paths ,
292292 group_name , mapper .group_map , names ,
293293 compiler_options
294294 )
@@ -447,7 +447,6 @@ def group_dir(group_name: str) -> str:
447447
448448class GroupGenerator :
449449 def __init__ (self ,
450- literals : LiteralsMap ,
451450 modules : List [Tuple [str , ModuleIR ]],
452451 source_paths : Dict [str , str ],
453452 group_name : Optional [str ],
@@ -461,7 +460,6 @@ def __init__(self,
461460 one .c file per module if in multi_file mode.)
462461
463462 Arguments:
464- literals: The literals declared in this group
465463 modules: (name, ir) pairs for each module in the group
466464 source_paths: Map from module names to source file paths
467465 group_name: The name of the group (or None if this is single-module compilation)
@@ -470,7 +468,6 @@ def __init__(self,
470468 multi_file: Whether to put each module in its own source file regardless
471469 of group structure.
472470 """
473- self .literals = literals
474471 self .modules = modules
475472 self .source_paths = source_paths
476473 self .context = EmitterContext (names , group_name , group_map )
@@ -495,6 +492,11 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
495492 file_contents = []
496493 multi_file = self .use_shared_lib and self .multi_file
497494
495+ # Collect all literal refs in IR.
496+ for _ , module in self .modules :
497+ for fn in module .functions :
498+ collect_literals (fn , self .context .literals )
499+
498500 base_emitter = Emitter (self .context )
499501 # Optionally just include the runtime library c files to
500502 # reduce the number of compiler invocations needed
@@ -505,12 +507,7 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
505507 base_emitter .emit_line ('#include "__native_internal{}.h"' .format (self .short_group_suffix ))
506508 emitter = base_emitter
507509
508- for (_ , literal ), identifier in self .literals .items ():
509- if isinstance (literal , int ):
510- symbol = emitter .static_name (identifier , None )
511- self .declare_global ('CPyTagged ' , symbol )
512- else :
513- self .declare_static_pyobject (identifier , emitter )
510+ self .generate_literal_tables ()
514511
515512 for module_name , module in self .modules :
516513 if multi_file :
@@ -621,6 +618,32 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
621618 '' .join (ext_declarations .fragments )),
622619 ]
623620
621+ def generate_literal_tables (self ) -> None :
622+ """Generate tables containing descriptions of Python literals to construct.
623+
624+ We will store the constructed literals in a single array that contains
625+ literals of all types. This way we can refer to an arbitrary literal by
626+ its index.
627+ """
628+ literals = self .context .literals
629+ # During module initialization we store all the constructed objects here
630+ self .declare_global ('PyObject *[%d]' % literals .num_literals (), 'CPyStatics' )
631+ # Descriptions of str literals
632+ init_str = c_string_array_initializer (literals .encoded_str_values ())
633+ self .declare_global ('const char * const []' , 'CPyLit_Str' , initializer = init_str )
634+ # Descriptions of bytes literals
635+ init_bytes = c_string_array_initializer (literals .encoded_bytes_values ())
636+ self .declare_global ('const char * const []' , 'CPyLit_Bytes' , initializer = init_bytes )
637+ # Descriptions of int literals
638+ init_int = c_string_array_initializer (literals .encoded_int_values ())
639+ self .declare_global ('const char * const []' , 'CPyLit_Int' , initializer = init_int )
640+ # Descriptions of float literals
641+ init_floats = c_array_initializer (literals .encoded_float_values ())
642+ self .declare_global ('const double []' , 'CPyLit_Float' , initializer = init_floats )
643+ # Descriptions of complex literals
644+ init_complex = c_array_initializer (literals .encoded_complex_values ())
645+ self .declare_global ('const double []' , 'CPyLit_Complex' , initializer = init_complex )
646+
624647 def generate_export_table (self , decl_emitter : Emitter , code_emitter : Emitter ) -> None :
625648 """Generate the declaration and definition of the group's export struct.
626649
@@ -793,46 +816,10 @@ def generate_globals_init(self, emitter: Emitter) -> None:
793816 for symbol , fixup in self .simple_inits :
794817 emitter .emit_line ('{} = {};' .format (symbol , fixup ))
795818
796- for (_ , literal ), identifier in self .literals .items ():
797- symbol = emitter .static_name (identifier , None )
798- if isinstance (literal , int ):
799- actual_symbol = symbol
800- symbol = INT_PREFIX + symbol
801- emitter .emit_line (
802- 'PyObject * {} = PyLong_FromString(\" {}\" , NULL, 10);' .format (
803- symbol , str (literal ))
804- )
805- elif isinstance (literal , float ):
806- emitter .emit_line (
807- '{} = PyFloat_FromDouble({});' .format (symbol , str (literal ))
808- )
809- elif isinstance (literal , complex ):
810- emitter .emit_line (
811- '{} = PyComplex_FromDoubles({}, {});' .format (
812- symbol , str (literal .real ), str (literal .imag ))
813- )
814- elif isinstance (literal , str ):
815- emitter .emit_line (
816- '{} = PyUnicode_FromStringAndSize({}, {});' .format (
817- symbol , * encode_as_c_string (literal ))
818- )
819- elif isinstance (literal , bytes ):
820- emitter .emit_line (
821- '{} = PyBytes_FromStringAndSize({}, {});' .format (
822- symbol , * encode_bytes_as_c_string (literal ))
823- )
824- else :
825- assert False , ('Literals must be integers, floating point numbers, or strings,' ,
826- 'but the provided literal is of type {}' .format (type (literal )))
827- emitter .emit_lines ('if (unlikely({} == NULL))' .format (symbol ),
828- ' return -1;' )
829- # Ints have an unboxed representation.
830- if isinstance (literal , int ):
831- emitter .emit_line (
832- '{} = CPyTagged_FromObject({});' .format (actual_symbol , symbol )
833- )
834- elif isinstance (literal , str ):
835- emitter .emit_line ('PyUnicode_InternInPlace(&{});' .format (symbol ))
819+ values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
820+ emitter .emit_lines ('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{' .format (values ),
821+ 'return -1;' ,
822+ '}' )
836823
837824 emitter .emit_lines (
838825 'is_initialized = 1;' ,
@@ -974,13 +961,19 @@ def _toposort_visit(name: str) -> None:
974961 def declare_global (self , type_spaced : str , name : str ,
975962 * ,
976963 initializer : Optional [str ] = None ) -> None :
964+ if '[' not in type_spaced :
965+ base = '{}{}' .format (type_spaced , name )
966+ else :
967+ a , b = type_spaced .split ('[' , 1 )
968+ base = '{}{}[{}' .format (a , name , b )
969+
977970 if not initializer :
978971 defn = None
979972 else :
980- defn = ['{}{} = {};' .format (type_spaced , name , initializer )]
973+ defn = ['{} = {};' .format (base , initializer )]
981974 if name not in self .context .declarations :
982975 self .context .declarations [name ] = HeaderDeclaration (
983- '{}{} ;' .format (type_spaced , name ),
976+ '{};' .format (base ),
984977 defn = defn ,
985978 )
986979
@@ -1080,3 +1073,55 @@ def is_fastcall_supported(fn: FuncIR) -> bool:
10801073 # TODO: Support fastcall for __init__.
10811074 return USE_FASTCALL and fn .name != '__init__'
10821075 return USE_FASTCALL
1076+
1077+
1078+ def collect_literals (fn : FuncIR , literals : Literals ) -> None :
1079+ """Store all Python literal object refs in fn.
1080+
1081+ Collecting literals must happen only after we have the final IR.
1082+ This way we won't include literals that have been optimized away.
1083+ """
1084+ for block in fn .blocks :
1085+ for op in block .ops :
1086+ if isinstance (op , LoadLiteral ):
1087+ literals .record_literal (op .value )
1088+
1089+
1090+ def c_array_initializer (components : List [str ]) -> str :
1091+ """Construct an initializer for a C array variable.
1092+
1093+ Components are C expressions valid in an initializer.
1094+
1095+ For example, if components are ["1", "2"], the result
1096+ would be "{1, 2}", which can be used like this:
1097+
1098+ int a[] = {1, 2};
1099+
1100+ If the result is long, split it into multiple lines.
1101+ """
1102+ res = []
1103+ current = [] # type: List[str]
1104+ cur_len = 0
1105+ for c in components :
1106+ if not current or cur_len + 2 + len (c ) < 70 :
1107+ current .append (c )
1108+ cur_len += len (c ) + 2
1109+ else :
1110+ res .append (', ' .join (current ))
1111+ current = [c ]
1112+ cur_len = len (c )
1113+ if not res :
1114+ # Result fits on a single line
1115+ return '{%s}' % ', ' .join (current )
1116+ # Multi-line result
1117+ res .append (', ' .join (current ))
1118+ return '{\n ' + ',\n ' .join (res ) + '\n }'
1119+
1120+
1121+ def c_string_array_initializer (components : List [bytes ]) -> str :
1122+ result = []
1123+ result .append ('{\n ' )
1124+ for s in components :
1125+ result .append (' ' + c_string_initializer (s ) + ',\n ' )
1126+ result .append ('}' )
1127+ return '' .join (result )
0 commit comments