@@ -953,7 +953,7 @@ def __init__(
953953 # until all the files have been added. This means that a
954954 # new file can be processed O(n**2) times. This cache
955955 # avoids most of this redundant work.
956- self .ast_cache : dict [str , tuple [MypyFile , list [ErrorInfo ]]] = {}
956+ self .ast_cache : dict [str , tuple [MypyFile , list [ErrorInfo ], str | None ]] = {}
957957 # Number of times we used GC optimization hack for fresh SCCs.
958958 self .gc_freeze_cycles = 0
959959 # Mapping from SCC id to corresponding SCC instance. This is populated
@@ -1039,11 +1039,66 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
10391039 as an optimization to parallelize only those parts of the code that can be
10401040 parallelized efficiently.
10411041 """
1042+ parallel_parsed_states , parallel_parsed_states_set = self .parse_files_threaded_raw (
1043+ sequential_states , parallel_states
1044+ )
1045+
1046+ for state in parallel_parsed_states :
1047+ # New parser returns serialized ASTs. Deserialize full trees only if not using
1048+ # parallel workers.
1049+ with state .wrap_context ():
1050+ assert state .tree is not None
1051+ raw_data = state .tree .raw_data
1052+ if raw_data is not None :
1053+ # Apply inline mypy config before deserialization, since
1054+ # some options (e.g. implicit_optional) affect deserialization
1055+ state .source_hash = raw_data .source_hash
1056+ state .apply_inline_configuration (raw_data .mypy_comments )
1057+ state .tree = load_from_raw (
1058+ state .xpath ,
1059+ state .id ,
1060+ raw_data ,
1061+ self .errors ,
1062+ state .options ,
1063+ imports_only = bool (self .workers ),
1064+ )
1065+ if self .errors .is_blockers ():
1066+ self .log ("Bailing due to parse errors" )
1067+ self .errors .raise_error ()
1068+
1069+ for state in parallel_states :
1070+ assert state .tree is not None
1071+ if state in parallel_parsed_states_set :
1072+ if state .tree .raw_data is not None :
1073+ # source_hash was already extracted above, but raw_data
1074+ # may have been preserved for workers (imports_only=True).
1075+ pass
1076+ elif state .source_hash is None :
1077+ # At least namespace packages may not have source.
1078+ state .get_source ()
1079+ state .size_hint = os .path .getsize (state .xpath )
1080+ state .early_errors = list (self .errors .error_info_map .get (state .xpath , []))
1081+ state .semantic_analysis_pass1 ()
1082+ self .ast_cache [state .id ] = (state .tree , state .early_errors , state .source_hash )
1083+ self .modules [state .id ] = state .tree
1084+ state .check_blockers ()
1085+ state .setup_errors ()
1086+
1087+ def parse_files_threaded_raw (
1088+ self , sequential_states : list [State ], parallel_states : list [State ]
1089+ ) -> tuple [list [State ], set [State ]]:
1090+ """Parse files using a thread pool.
1091+
1092+ Also parse sequential states while waiting for the parallel results.
1093+ Trees from the new parser are left in raw (serialized) form.
1094+
1095+ Return (list, set) of states that were actually parsed (not cached).
1096+ """
10421097 futures = []
10431098 # Use both list and a set to have more predictable order of errors,
10441099 # while also not sacrificing performance.
1045- parallel_parsed_states = []
1046- parallel_parsed_states_set = set ()
1100+ parallel_parsed_states : list [ State ] = []
1101+ parallel_parsed_states_set : set [ State ] = set ()
10471102 # Use at least --num-workers if specified by user.
10481103 available_threads = max (get_available_threads (), self .options .num_workers )
10491104 # Overhead from trying to parallelize (small) blocking portion of
@@ -1052,53 +1107,27 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
10521107 with ThreadPoolExecutor (max_workers = min (available_threads , 8 )) as executor :
10531108 for state in parallel_states :
10541109 state .needs_parse = False
1055- # New parser reads source from file directly, we do this only for
1056- # the side effect of parsing inline mypy configurations.
1057- state .get_source ()
10581110 if state .id not in self .ast_cache :
10591111 self .log (f"Parsing { state .xpath } ({ state .id } )" )
10601112 ignore_errors = state .ignore_all or state .options .ignore_errors
10611113 if ignore_errors :
10621114 self .errors .ignored_files .add (state .xpath )
1063- futures .append (executor .submit (state .parse_file_inner , state . source or "" ))
1115+ futures .append (executor .submit (state .parse_file_inner , "" ))
10641116 parallel_parsed_states .append (state )
10651117 parallel_parsed_states_set .add (state )
10661118 else :
10671119 self .log (f"Using cached AST for { state .xpath } ({ state .id } )" )
1068- state .tree , state .early_errors = self .ast_cache [state .id ]
1120+ state .tree , state .early_errors , source_hash = self .ast_cache [state .id ]
1121+ state .source_hash = source_hash
10691122
10701123 # Parse sequential before waiting on parallel.
10711124 for state in sequential_states :
10721125 state .parse_file ()
10731126
10741127 for fut in wait (futures ).done :
10751128 fut .result ()
1076- for state in parallel_parsed_states :
1077- # New parser returns serialized trees that need to be de-serialized.
1078- with state .wrap_context ():
1079- assert state .tree is not None
1080- if state .tree .raw_data :
1081- state .tree = load_from_raw (
1082- state .xpath ,
1083- state .id ,
1084- state .tree .raw_data ,
1085- self .errors ,
1086- state .options ,
1087- imports_only = bool (self .workers ),
1088- )
1089- if self .errors .is_blockers ():
1090- self .log ("Bailing due to parse errors" )
1091- self .errors .raise_error ()
10921129
1093- for state in parallel_states :
1094- assert state .tree is not None
1095- if state in parallel_parsed_states_set :
1096- state .early_errors = list (self .errors .error_info_map .get (state .xpath , []))
1097- state .semantic_analysis_pass1 ()
1098- self .ast_cache [state .id ] = (state .tree , state .early_errors )
1099- self .modules [state .id ] = state .tree
1100- state .check_blockers ()
1101- state .setup_errors ()
1130+ return parallel_parsed_states , parallel_parsed_states_set
11021131
11031132 def post_parse_all (self , states : list [State ]) -> None :
11041133 for state in states :
@@ -3090,7 +3119,6 @@ def get_source(self) -> str:
30903119 self .source_hash = compute_hash (source )
30913120
30923121 self .parse_inline_configuration (source )
3093- self .check_for_invalid_options ()
30943122
30953123 self .size_hint = len (source )
30963124 self .time_spent_us += time_spent_us (t0 )
@@ -3115,7 +3143,10 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
31153143 # The file was already parsed.
31163144 return
31173145
3118- source = self .get_source ()
3146+ if raw_data is None :
3147+ source = self .get_source ()
3148+ else :
3149+ source = ""
31193150 manager = self .manager
31203151 # Can we reuse a previously parsed AST? This avoids redundant work in daemon.
31213152 if self .id not in manager .ast_cache :
@@ -3125,6 +3156,12 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
31253156 self .manager .errors .ignored_files .add (self .xpath )
31263157 with self .wrap_context ():
31273158 manager .errors .set_file (self .xpath , self .id , options = self .options )
3159+ if raw_data is not None :
3160+ # Apply inline mypy config before deserialization, since
3161+ # some options (e.g. implicit_optional) affect how the
3162+ # AST is built during deserialization.
3163+ self .source_hash = raw_data .source_hash
3164+ self .apply_inline_configuration (raw_data .mypy_comments )
31283165 self .parse_file_inner (source , raw_data )
31293166 assert self .tree is not None
31303167 # New parser returns serialized trees that need to be de-serialized.
@@ -3149,14 +3186,15 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
31493186 else :
31503187 # Reuse a cached AST
31513188 manager .log (f"Using cached AST for { self .xpath } ({ self .id } )" )
3152- self .tree , self .early_errors = manager .ast_cache [self .id ]
3189+ self .tree , self .early_errors , source_hash = manager .ast_cache [self .id ]
3190+ self .source_hash = source_hash
31533191
31543192 assert self .tree is not None
31553193 if not temporary :
31563194 manager .modules [self .id ] = self .tree
31573195 self .check_blockers ()
31583196
3159- manager .ast_cache [self .id ] = (self .tree , self .early_errors )
3197+ manager .ast_cache [self .id ] = (self .tree , self .early_errors , self . source_hash )
31603198 self .setup_errors ()
31613199
31623200 def setup_errors (self ) -> None :
@@ -3169,12 +3207,17 @@ def setup_errors(self) -> None:
31693207 def parse_inline_configuration (self , source : str ) -> None :
31703208 """Check for inline mypy: options directive and parse them."""
31713209 flags = get_mypy_comments (source )
3210+ self .apply_inline_configuration (flags )
3211+
3212+ def apply_inline_configuration (self , flags : list [tuple [int , str ]] | None ) -> None :
3213+ """Apply inline mypy configuration comments and check for invalid options."""
31723214 if flags :
31733215 changes , config_errors = parse_mypy_comments (flags , self .options )
31743216 self .options = self .options .apply_changes (changes )
31753217 self .manager .errors .set_file (self .xpath , self .id , self .options )
31763218 for lineno , error in config_errors :
31773219 self .manager .error (lineno , error )
3220+ self .check_for_invalid_options ()
31783221
31793222 def check_for_invalid_options (self ) -> None :
31803223 if self .options .mypyc and not self .options .strict_bytes :
0 commit comments