diff --git a/Changes b/Changes index 381da59ee270..5a24f8773d6c 100644 --- a/Changes +++ b/Changes @@ -295,7 +295,7 @@ Working version in Typecore in favor of local mutable state. (Nick Roberts, review by Takafumi Saikawa) -- #12236, #12386, #12391: Use syntax as the sole determiner of function arity +- #12236, #12386, #12391, #12496: Use syntax as the sole determiner of fun arity This changes function arity to be based solely on the source program's parsetree. Previously, the heuristic for arity had more subtle heuristics that involved type information about patterns. Function arity is important diff --git a/lambda/lambda.ml b/lambda/lambda.ml index 8c112d6bad62..ba964cd0bee0 100644 --- a/lambda/lambda.ml +++ b/lambda/lambda.ml @@ -285,6 +285,7 @@ type function_attribute = { is_a_functor: bool; stub: bool; tmc_candidate: bool; + may_fuse_arity: bool; } type scoped_location = Debuginfo.Scoped_location.t @@ -384,6 +385,15 @@ let default_function_attribute = { is_a_functor = false; stub = false; tmc_candidate = false; + (* Plain functions ([fun] and [function]) set [may_fuse_arity] to [false] so + that runtime arity matches syntactic arity in more situations. + + Many things compile to functions without having a notion of syntactic arity + that survives typechecking, e.g. functors. Multi-arg functors are compiled + as nested unary functions, and rely on the arity fusion in simplif to make + them multi-argument. So, we keep arity fusion turned on by default for now. + *) + may_fuse_arity = true; } let default_stub_attribute = diff --git a/lambda/lambda.mli b/lambda/lambda.mli index 48a57b8d7df4..4936e8816ea4 100644 --- a/lambda/lambda.mli +++ b/lambda/lambda.mli @@ -272,6 +272,11 @@ type function_attribute = { is_a_functor: bool; stub: bool; tmc_candidate: bool; + (* [simplif.ml] (in the `simplif` function within `simplify_lets`) attempts to + fuse nested functions, rewriting e.g. [fun x -> fun y -> e] to + [fun x y -> e]. This fusion is allowed only when the [may_fuse_arity] field + on *both* functions involved is [true]. *) + may_fuse_arity: bool; } type scoped_location = Debuginfo.Scoped_location.t diff --git a/lambda/simplif.ml b/lambda/simplif.ml index 479b46d6bc73..8d01fcc29120 100644 --- a/lambda/simplif.ml +++ b/lambda/simplif.ml @@ -516,19 +516,23 @@ let simplify_lets lam = end | _ -> no_opt () end - | Lfunction{kind; params; return=return1; body = l; attr; loc} -> + | Lfunction{kind; params; return=return1; body = l; attr=attr1; loc} + -> begin match simplif l with - Lfunction{kind=Curried; params=params'; return=return2; body; attr; loc} + Lfunction{kind=Curried; params=params'; return=return2; body; + attr=attr2; loc} when kind = Curried && optimize && + attr1.may_fuse_arity && attr2.may_fuse_arity && List.length params + List.length params' <= Lambda.max_arity() -> (* The return type is the type of the value returned after applying all the parameters to the function. The return type of the merged function taking [params @ params'] as parameters is the type returned after applying [params']. *) let return = return2 in - lfunction ~kind ~params:(params @ params') ~return ~body ~attr ~loc + lfunction ~kind ~params:(params @ params') ~return ~body ~attr:attr2 + ~loc | body -> - lfunction ~kind ~params ~return:return1 ~body ~attr ~loc + lfunction ~kind ~params ~return:return1 ~body ~attr:attr1 ~loc end | Llet(_str, _k, v, Lvar w, l2) when optimize -> Hashtbl.add subst v (simplif (Lvar w)); diff --git a/lambda/translclass.ml b/lambda/translclass.ml index 8db67e101b48..129157377060 100644 --- a/lambda/translclass.ml +++ b/lambda/translclass.ml @@ -31,7 +31,8 @@ let lfunction params body = if params = [] then body else match body with | Lfunction {kind = Curried; params = params'; body = body'; attr; loc} - when List.length params + List.length params' <= Lambda.max_arity() -> + when attr.may_fuse_arity && + List.length params + List.length params' <= Lambda.max_arity() -> lfunction ~kind:Curried ~params:(params @ params') ~return:Pgenval ~body:body' diff --git a/lambda/translcore.ml b/lambda/translcore.ml index e08e4a5c2ec7..c5f91ccafe81 100644 --- a/lambda/translcore.ml +++ b/lambda/translcore.ml @@ -120,6 +120,11 @@ let assert_failed loc ~scopes exp = Const_base(Const_int line); Const_base(Const_int char)]))], loc))], loc) +(* In cases where we're careful to preserve syntactic arity, we disable + the arity fusion attempted by simplif.ml *) +let function_attribute_disallowing_arity_fusion = + { default_function_attribute with may_fuse_arity = false } + let rec cut n l = if n = 0 then ([],l) else match l with [] -> failwith "Translcore.cut" @@ -524,7 +529,11 @@ and transl_exp0 ~in_new_scope ~scopes e = let fn = lfunction ~kind:Curried ~params:[Ident.create_local "param", Pgenval] ~return:Pgenval - ~attr:default_function_attribute + (* The translation of [e] may be a function, in + which case disallowing arity fusion gives a very + small performance improvement. + *) + ~attr:function_attribute_disallowing_arity_fusion ~loc:(of_location ~scopes e.exp_loc) ~body:(transl_exp ~scopes e) in Lprim(Pmakeblock(Config.lazy_tag, Mutable, None), [fn], @@ -831,7 +840,7 @@ and transl_curried_function ~scopes loc return repr params body = let body, return = List.fold_right (fun chunk (body, return) -> - let attr = default_function_attribute in + let attr = function_attribute_disallowing_arity_fusion in let loc = of_location ~scopes loc in let body = lfunction ~kind:Curried ~params:chunk ~return ~body ~attr ~loc @@ -852,7 +861,7 @@ and transl_function ~scopes e params body = let params, body = fuse_method_arity params body in transl_function_without_attributes ~scopes e.exp_loc repr params body) in - let attr = default_function_attribute in + let attr = function_attribute_disallowing_arity_fusion in let loc = of_location ~scopes e.exp_loc in let lam = lfunction ~kind ~params ~return ~body ~attr ~loc in let attrs = @@ -1175,7 +1184,7 @@ and transl_letop ~scopes loc env let_ ands param case partial = { cases = [case]; param; partial; loc = ghost_loc; exp_extra = None; attributes = []; })) in - let attr = default_function_attribute in + let attr = function_attribute_disallowing_arity_fusion in let loc = of_location ~scopes case.c_rhs.exp_loc in lfunction ~kind ~params ~return ~body ~attr ~loc in diff --git a/lambda/translmod.ml b/lambda/translmod.ml index 83e8b8cdd6e6..8e0b9ab287d3 100644 --- a/lambda/translmod.ml +++ b/lambda/translmod.ml @@ -121,7 +121,8 @@ and apply_coercion_result loc strict funct params args cc_res = ~return:Pgenval ~attr:{ default_function_attribute with is_a_functor = true; - stub = true; } + stub = true; + may_fuse_arity = true; } ~loc ~body:(apply_coercion loc Strict cc_res @@ -500,6 +501,7 @@ let rec compile_functor ~scopes mexp coercion root_path loc = is_a_functor = true; stub = false; tmc_candidate = false; + may_fuse_arity = true; } ~loc ~body diff --git a/testsuite/tests/syntactic-arity/measure_runtime_arity.ml b/testsuite/tests/syntactic-arity/measure_runtime_arity.ml new file mode 100644 index 000000000000..a9203c7213bc --- /dev/null +++ b/testsuite/tests/syntactic-arity/measure_runtime_arity.ml @@ -0,0 +1,128 @@ +(* TEST + flags = "-w +A-70"; + setup-ocamlopt.byte-build-env; + ocamlopt.byte; + run; + check-program-output; +*) + +(* Check that the runtime arity of a function (i.e., its 'fast path' for + runtime application) matches its syntactic arity (i.e., the number + of arguments appearing directly following [fun]). +*) + +(* This function will need to change if the runtime representation of closures + changes. Currently, the arity is the first 8 bits of the second field of + a closure. +*) +let extract_arity_from_closure (closure : Obj.t) : int = + assert (Obj.closure_tag = Obj.tag closure); + let clos_info = Obj.raw_field (Obj.repr closure) 1 in + Nativeint.(to_int (shift_right clos_info (Sys.word_size - 8))) + +type (_, _) raw_arity = + | One : (int -> 'ret, 'ret) raw_arity + | Succ : ('f, 'ret) raw_arity -> (int -> 'f, 'ret) raw_arity + +let rec numeric_arity : type f ret. (f, ret) raw_arity -> int = + fun arity -> + match arity with + | One -> 1 + | Succ arity -> numeric_arity arity + 1 + +let rec apply : type f ret. (f, ret) raw_arity -> f -> int -> ret = + fun arity f arg -> + match arity with + | One -> f arg + | Succ arity -> apply arity (f arg) arg + +type 'a arity = + | Tupled + | Curried : ('a, _) raw_arity -> 'a arity + +type packed_raw_arity = Packed_raw_arity : _ raw_arity -> packed_raw_arity +type packed_arity = Packed_arity : _ arity -> packed_arity + +let arity_description (type a) (arity : a arity) = + match arity with + | Tupled -> "tupled fun" + | Curried arity -> Printf.sprintf "%d-ary fun" (numeric_arity arity) + +(* [runtime_arity] depends on representation details of functions and + is subject to change. +*) +let runtime_arity (f : 'a -> 'b) : ('a -> 'b) arity = + let raw_arity = extract_arity_from_closure (Obj.repr f) in + if raw_arity < 0 then Tupled else + let rec build_arity n = + if n = 1 then Packed_raw_arity One + else + let Packed_raw_arity pred = build_arity (n-1) in + Packed_raw_arity (Succ pred) + in + let Packed_raw_arity arity = build_arity raw_arity in + (* Obj.magic is claiming that [f]'s arity matches the arity + we've constructed here. + *) + Curried (Obj.magic arity : ('a -> 'b, _) raw_arity) + +let maybe_runtime_arity (type a) (x : a) : a arity option = + let open struct + type _ is_function = + | Not_function : _ is_function + | Is_function : (_ -> _) is_function + + let is_function (type a) (x : a) = + if Obj.tag (Obj.repr x) = Obj.closure_tag + then (Obj.magic Is_function : a is_function) + else Not_function + end + in + match is_function x with + | Is_function -> Some (runtime_arity x) + | Not_function -> None + +(* The "nested arity" of a value is either: + - the empty list, if the value isn't a function + - x :: xs if the value is a function [f], where [x] is [f]'s arity, and + [xs] is the nested arity of the result of applying [f] to [x] many + values. + + "nested arity" isn't well-defined for a function that, say, returns a 2-ary + function for some inputs and a 3-ary for others. None of the functions in + this test do that. +*) +let rec nested_arity : type a. a -> packed_arity list = + fun f -> + match maybe_runtime_arity f with + | None -> [] + | Some x -> + let rest = + match x with + | Tupled -> [] + | Curried arity -> nested_arity (apply arity f 1_234) + in + Packed_arity x :: rest + +let run ~name f = + Printf.printf "%s: %s\n" name + (nested_arity f + |> List.map (fun (Packed_arity arity) -> arity_description arity) + |> String.concat " returning ") + +let () = + print_endline "Key:"; + print_endline " : "; + print_newline (); + run (fun _ _ _ -> ()) ~name:"3 params"; + run (fun _ _ -> fun _ -> ()) ~name:"2 params then 1 param"; + run (fun _ -> fun _ _ -> ()) ~name:"1 param then 2 params"; + run (fun _ -> fun _ -> fun _ -> ()) + ~name:"1 param, then 1 param, then 1 param"; + run (fun _ -> let g _ _ = () in g) + ~name:"1 param then let-bound 2 params"; + run (fun _ _ -> let g _ = () in g) + ~name:"2 params then let-bound 1 param"; + run (fun _ -> let g _ = let h _ = () in h in g) + ~name:"1 param, then let-bound 1 param, then let-bound 1 param"; +;; diff --git a/testsuite/tests/syntactic-arity/measure_runtime_arity.reference b/testsuite/tests/syntactic-arity/measure_runtime_arity.reference new file mode 100644 index 000000000000..9f1e7c56c3e8 --- /dev/null +++ b/testsuite/tests/syntactic-arity/measure_runtime_arity.reference @@ -0,0 +1,10 @@ +Key: + : + +3 params: 3-ary fun +2 params then 1 param: 2-ary fun returning 1-ary fun +1 param then 2 params: 1-ary fun returning 2-ary fun +1 param, then 1 param, then 1 param: 1-ary fun returning 1-ary fun returning 1-ary fun +1 param then let-bound 2 params: 1-ary fun returning 2-ary fun +2 params then let-bound 1 param: 2-ary fun returning 1-ary fun +1 param, then let-bound 1 param, then let-bound 1 param: 1-ary fun returning 1-ary fun returning 1-ary fun