From a46f196c0ea955354e71c242d2ca5039a726989a Mon Sep 17 00:00:00 2001 From: Lars Thorsen Date: Tue, 18 Feb 2025 08:32:12 +0100 Subject: [PATCH] Fix a tail recursion bug in sax parser - Rewrote some recursion when calling continuation fun in xmerl_sax_parser_base. - Remove all old style catch calls in xmerl --- lib/xmerl/src/xmerl_eventp.erl | 19 +- lib/xmerl/src/xmerl_regexp.erl | 11 +- lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 303 +++++++++------------ lib/xmerl/src/xmerl_scan.erl | 31 +-- lib/xmerl/src/xmerl_simple.erl | 9 +- lib/xmerl/src/xmerl_validate.erl | 27 +- lib/xmerl/src/xmerl_xpath_pred.erl | 42 +-- lib/xmerl/src/xmerl_xsd.erl | 199 +++++++++----- lib/xmerl/src/xmerl_xsd_type.erl | 79 ++++-- 9 files changed, 393 insertions(+), 327 deletions(-) diff --git a/lib/xmerl/src/xmerl_eventp.erl b/lib/xmerl/src/xmerl_eventp.erl index 4f259370bf43..85cdad053567 100644 --- a/lib/xmerl/src/xmerl_eventp.erl +++ b/lib/xmerl/src/xmerl_eventp.erl @@ -314,16 +314,16 @@ cont(F, Exception, S) -> cont2(F, Exception, Sofar, Fd, Fname, T, S) -> - case catch read_chunk(Fd, Fname, Sofar) of - {ok, Bin} -> - find_good_split(list_to_binary([Sofar,Bin]), + case read_chunk(Fd, Fname, Sofar) of + {ok, Bin} -> + find_good_split(list_to_binary([Sofar,Bin]), F,Exception,Fd,Fname,T,S); eof -> ok = file:close(Fd), NewS = xmerl_scan:cont_state([{Fname, eof}|T], S), F(binary_to_list(Sofar), NewS); - Error -> - exit(Error) + Error -> + exit(Error) end. read_chunk(Fd, _Fname, _Sofar) -> @@ -468,11 +468,12 @@ rules_read(Context, Name, #xmerl_scanner{rules = T}) -> %%% Generic helper functions scanner_options([H|T], Opts) -> - case catch keyreplace(H, 1, Opts) of - false -> - scanner_options(T, [H|Opts]); - NewOpts -> + try keyreplace(H, 1, Opts) of + NewOpts -> scanner_options(T, NewOpts) + catch + throw:false -> + scanner_options(T, [H|Opts]) end; scanner_options([], Opts) -> Opts. diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl index 8b91f973aaa2..2df4fa45830b 100644 --- a/lib/xmerl/src/xmerl_regexp.erl +++ b/lib/xmerl/src/xmerl_regexp.erl @@ -241,10 +241,13 @@ sh_special_char(_C) -> false. %% Parse the regexp described in the string RegExp. parse(S) -> - case catch reg(S, 0) of - {R,Sc,[]} -> {ok,{regexp,{R,Sc}}}; - {_R,_Sc,[C|_]} -> {error,{illegal,[C]}}; - {error,E} -> {error,E} + try reg(S, 0) of + {R,Sc,[]} -> + {ok,{regexp,{R,Sc}}}; + {_R,_Sc,[C|_]} -> + {error,{illegal,[C]}} + catch + throw:{error,E} -> {error,E} end. %% format_error(Error) -> String. diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc index 9c38b3f4074c..3b98218f7ad3 100644 --- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc +++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc @@ -77,29 +77,6 @@ parse(Xml, State) -> handle_end_document({other, OtherError, State}) end. - % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - % {ok, Rest, State2} -> - % State3 = event_callback(endDocument, State2), - % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of - % true -> - % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - % false -> - % format_error(fatal_error, State3, "Input found after legal document") - % end; - % {fatal_error, {State2, Reason}} -> - % State3 = event_callback(endDocument, State2), - % format_error(fatal_error, State3, Reason); - % {event_receiver_error, State2, {Tag, Reason}} -> - % State3 = event_callback(endDocument, State2), - % format_error(Tag, State3, Reason); - % {endDocument, Rest, State2} -> - % State3 = event_callback(endDocument, State2), - % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - % Other -> - % _State2 = event_callback(endDocument, State1), - % {fatal_error, Other} - % end. - %%---------------------------------------------------------------------- %% Function: parse_dtd(Xml, State) -> Result %% Input: Xml = string() | binary() @@ -122,26 +99,6 @@ parse_dtd(Xml, State) -> handle_end_document({other, OtherError, State}) end. - - % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of - % {fatal_error, {State2, Reason}} -> - % State3 = event_callback(endDocument, State2), - % format_error(fatal_error, State3, Reason); - % {event_receiver_error, State2, {Tag, Reason}} -> - % State3 = event_callback(endDocument, State2), - % format_error(Tag, State3, Reason); - % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - % State3 = event_callback(endDocument, State2), - % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> - % State3 = event_callback(endDocument, State2), - % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; - % Other -> - % _State2 = event_callback(endDocument, State1), - % {fatal_error, Other} - % end. - - %%====================================================================== %% Internal functions %%====================================================================== @@ -1311,36 +1268,30 @@ parse_etag_1(Bytes, State, Tag) -> %% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* %%---------------------------------------------------------------------- parse_content(?STRING_EMPTY, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of - {fatal_error, {State1, "No more bytes"}} when ET == [] -> + try cf(?STRING_EMPTY, State) of + {NewBytes, NewState} -> + parse_content(NewBytes, NewState, Acc, IgnorableWS) + catch + throw:{fatal_error, {State1, "No more bytes"}} when ET == [] -> State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), {?STRING_EMPTY, State2}; - {fatal_error, {State1, "Continuation function undefined"}} when ET == [] -> + throw:{fatal_error, {State1, "Continuation function undefined"}} when ET == [] -> State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), - {?STRING_EMPTY, State2}; - {fatal_error, {State1, Msg}} -> - ?fatal_error(State1, Msg); - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - Other -> - throw(Other) + {?STRING_EMPTY, State2} end; parse_content(?STRING("\r") = Bytes, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> - case catch cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4) of - {fatal_error, {State1, "No more bytes"}} when ET == [] -> + try cf(Bytes, State) of + {NewBytes, NewState} -> + parse_content(NewBytes, NewState, Acc, IgnorableWS) + catch + throw:{fatal_error, {State1, "No more bytes"}} when ET == [] -> Acc1 = [?lf |Acc], State2 = send_character_event(length(Acc1), IgnorableWS, lists:reverse(Acc1), State1), {?STRING_EMPTY, State2}; - {fatal_error, {State1, "Continuation function undefined"}} when ET == [] -> + throw:{fatal_error, {State1, "Continuation function undefined"}} when ET == [] -> Acc1 = [?lf |Acc], State2 = send_character_event(length(Acc1), IgnorableWS, lists:reverse(Acc1), State1), - {?STRING_EMPTY, State2}; - {fatal_error, {State1, Msg}} -> - ?fatal_error(State1, Msg); - {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Rest, State1}; - Other -> - throw(Other) + {?STRING_EMPTY, State2} end; parse_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); @@ -1472,15 +1423,14 @@ parse_content(Bytes, State, Acc, IgnorableWS) -> parse_entity_content(Bytes, #xmerl_sax_parser_state{file_type = text} = State, Acc, _IgnorableWS) -> parse_entity_content_1(Bytes, State, Acc); parse_entity_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> - case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_entity_content/4) of - {Acc1, Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> - {Acc1, Rest, State1}; - {fatal_error, {State1, "No more bytes"}} -> - {Acc, ?STRING_EMPTY, State1}; - {fatal_error, {State1, "Continuation function undefined"}} -> + try cf(?STRING_EMPTY, State) of + {NewBytes, NewState} -> + parse_entity_content(NewBytes, NewState, Acc, IgnorableWS) + catch + throw:{fatal_error, {State1, "No more bytes"}} -> {Acc, ?STRING_EMPTY, State1}; - {fatal_error, {State1, Message}} -> - ?fatal_error(State1, Message) + throw:{fatal_error, {State1, "Continuation function undefined"}} -> + {Acc, ?STRING_EMPTY, State1} end; parse_entity_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) -> cf(Bytes, State, Acc, IgnorableWS, fun parse_entity_content/4); @@ -1490,13 +1440,12 @@ parse_entity_content(?STRING(" cf(Bytes, State, Acc, IgnorableWS, fun parse_entity_content/4); parse_entity_content(?STRING_REST("