@@ -1039,11 +1039,66 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
10391039 as an optimization to parallelize only those parts of the code that can be
10401040 parallelized efficiently.
10411041 """
1042+ parallel_parsed_states , parallel_parsed_states_set = self .parse_files_threaded_raw (
1043+ sequential_states , parallel_states
1044+ )
1045+
1046+ for state in parallel_parsed_states :
1047+ # New parser returns serialized ASTs. Deserialize full trees only if not using
1048+ # parallel workers.
1049+ with state .wrap_context ():
1050+ assert state .tree is not None
1051+ raw_data = state .tree .raw_data
1052+ if raw_data is not None :
1053+ # Apply inline mypy config before deserialization, since
1054+ # some options (e.g. implicit_optional) affect deserialization
1055+ state .source_hash = raw_data .source_hash
1056+ state .apply_inline_configuration (raw_data .mypy_comments )
1057+ state .tree = load_from_raw (
1058+ state .xpath ,
1059+ state .id ,
1060+ raw_data ,
1061+ self .errors ,
1062+ state .options ,
1063+ imports_only = bool (self .workers ),
1064+ )
1065+ if self .errors .is_blockers ():
1066+ self .log ("Bailing due to parse errors" )
1067+ self .errors .raise_error ()
1068+
1069+ for state in parallel_states :
1070+ assert state .tree is not None
1071+ if state in parallel_parsed_states_set :
1072+ if state .tree .raw_data is not None :
1073+ # source_hash was already extracted above, but raw_data
1074+ # may have been preserved for workers (imports_only=True).
1075+ pass
1076+ elif state .source_hash is None :
1077+ # At least namespace packages may not have source.
1078+ state .get_source ()
1079+ state .size_hint = os .path .getsize (state .xpath )
1080+ state .early_errors = list (self .errors .error_info_map .get (state .xpath , []))
1081+ state .semantic_analysis_pass1 ()
1082+ self .ast_cache [state .id ] = (state .tree , state .early_errors , state .source_hash )
1083+ self .modules [state .id ] = state .tree
1084+ state .check_blockers ()
1085+ state .setup_errors ()
1086+
1087+ def parse_files_threaded_raw (
1088+ self , sequential_states : list [State ], parallel_states : list [State ]
1089+ ) -> tuple [list [State ], set [State ]]:
1090+ """Parse files using a thread pool.
1091+
1092+ Also parse sequential states while waiting for the parallel results.
1093+ Trees from the new parser are left in raw (serialized) form.
1094+
1095+ Return (list, set) of states that were actually parsed (not cached).
1096+ """
10421097 futures = []
10431098 # Use both list and a set to have more predictable order of errors,
10441099 # while also not sacrificing performance.
1045- parallel_parsed_states = []
1046- parallel_parsed_states_set = set ()
1100+ parallel_parsed_states : list [ State ] = []
1101+ parallel_parsed_states_set : set [ State ] = set ()
10471102 # Use at least --num-workers if specified by user.
10481103 available_threads = max (get_available_threads (), self .options .num_workers )
10491104 # Overhead from trying to parallelize (small) blocking portion of
@@ -1071,45 +1126,8 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
10711126
10721127 for fut in wait (futures ).done :
10731128 fut .result ()
1074- for state in parallel_parsed_states :
1075- # New parser returns serialized trees that need to be de-serialized.
1076- with state .wrap_context ():
1077- assert state .tree is not None
1078- raw_data = state .tree .raw_data
1079- if raw_data is not None :
1080- # Apply inline mypy config before deserialization, since
1081- # some options (e.g. implicit_optional) affect deserialization
1082- state .source_hash = raw_data .source_hash
1083- state .apply_inline_configuration (raw_data .mypy_comments )
1084- state .tree = load_from_raw (
1085- state .xpath ,
1086- state .id ,
1087- raw_data ,
1088- self .errors ,
1089- state .options ,
1090- imports_only = bool (self .workers ),
1091- )
1092- if self .errors .is_blockers ():
1093- self .log ("Bailing due to parse errors" )
1094- self .errors .raise_error ()
10951129
1096- for state in parallel_states :
1097- assert state .tree is not None
1098- if state in parallel_parsed_states_set :
1099- if state .tree .raw_data is not None :
1100- # source_hash was already extracted above, but raw_data
1101- # may have been preserved for workers (imports_only=True).
1102- pass
1103- elif state .source_hash is None :
1104- # At least namespace packages may not have source.
1105- state .get_source ()
1106- state .size_hint = os .path .getsize (state .xpath )
1107- state .early_errors = list (self .errors .error_info_map .get (state .xpath , []))
1108- state .semantic_analysis_pass1 ()
1109- self .ast_cache [state .id ] = (state .tree , state .early_errors , state .source_hash )
1110- self .modules [state .id ] = state .tree
1111- state .check_blockers ()
1112- state .setup_errors ()
1130+ return parallel_parsed_states , parallel_parsed_states_set
11131131
11141132 def post_parse_all (self , states : list [State ]) -> None :
11151133 for state in states :
0 commit comments