@@ -25,8 +25,8 @@ struct Regex<Output> : RegexComponent {
2525
2626 init ( _ pattern: String ) throws where Output == AnyRegexOutput { }
2727
28- func ignoresCase( _ ignoresCase: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
29- func dotMatchesNewlines( _ dotMatchesNewlines: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
28+ func ignoresCase( _ ignoresCase: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
29+ func dotMatchesNewlines( _ dotMatchesNewlines: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
3030
3131 func firstMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
3232
@@ -56,7 +56,8 @@ class NSRegularExpression : NSObject {
5656 struct Options : OptionSet {
5757 var rawValue : UInt
5858
59- static var caseInsensitive : NSRegularExpression . Options { get { return Options ( rawValue: 1 ) } }
59+ static var caseInsensitive : NSRegularExpression . Options { get { return Options ( rawValue: 1 << 0 ) } }
60+ static var dotMatchesLineSeparators : NSRegularExpression . Options { get { return Options ( rawValue: 1 << 1 ) } }
6061 }
6162
6263 struct MatchingOptions : OptionSet {
@@ -111,62 +112,106 @@ func myRegexpVariantsTests(myUrl: URL) throws {
111112 _ = try re9. firstMatch ( in: tainted)
112113
113114 // BAD - does not match double quotes for attribute values
114- let re10 = try Regex ( #"<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
115- _ = try re10. firstMatch ( in: tainted)
115+ let re10a = try Regex ( #"(?is)<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# )
116+ _ = try re10a. firstMatch ( in: tainted)
117+ // BAD - does not match double quotes for attribute values
118+ let re10b = try Regex ( #"<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
119+ _ = try re10b. firstMatch ( in: tainted)
120+ // BAD - does not match double quotes for attribute values
121+ let options10 : NSRegularExpression . Options = [ . caseInsensitive, . dotMatchesLineSeparators]
122+ let ns10 = try NSRegularExpression ( pattern: #"<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# , options: options10)
123+ _ = ns10. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
116124
117125 // BAD - does not match tabs between attributes
118- let re11 = try Regex ( #"<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
119- _ = try re11. firstMatch ( in: tainted)
126+ let re11a = try Regex ( #"(?is)<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# )
127+ _ = try re11a. firstMatch ( in: tainted)
128+ // BAD - does not match tabs between attributes
129+ let re11b = try Regex ( #"<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
130+ _ = try re11b. firstMatch ( in: tainted)
131+ // BAD - does not match tabs between attributes
132+ let options11 : NSRegularExpression . Options = [ . caseInsensitive, . dotMatchesLineSeparators]
133+ let ns11 = try NSRegularExpression ( pattern: #"<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# , options: options11)
134+ _ = ns11. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
120135
121136 // BAD - does not match uppercase SCRIPT tags
122- let re12 = try Regex ( #"<script.*?>.*?<\/script[^>]*>"# ) . dotMatchesNewlines ( true )
123- _ = try re12. firstMatch ( in: tainted)
137+ let re12a = try Regex ( #"(?s)<script.*?>.*?<\/script[^>]*>"# )
138+ _ = try re12a. firstMatch ( in: tainted)
139+ // BAD - does not match uppercase SCRIPT tags
140+ let re12b = try Regex ( #"<script.*?>.*?<\/script[^>]*>"# ) . dotMatchesNewlines ( true )
141+ _ = try re12b. firstMatch ( in: tainted)
142+ // BAD - does not match uppercase SCRIPT tags
143+ let ns12 = try NSRegularExpression ( pattern: #"<script.*?>.*?<\/script[^>]*>"# , options: . dotMatchesLineSeparators)
144+ _ = ns12. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
124145
125146 // BAD - does not match mixed case script tags
126- let re13 = try Regex ( #"<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# ) . dotMatchesNewlines ( true )
127- _ = try re13. firstMatch ( in: tainted)
147+ let re13a = try Regex ( #"(?s)<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# )
148+ _ = try re13a. firstMatch ( in: tainted)
149+ // BAD - does not match mixed case script tags
150+ let re13b = try Regex ( #"<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# ) . dotMatchesNewlines ( true )
151+ _ = try re13b. firstMatch ( in: tainted)
152+ // BAD - does not match mixed case script tags
153+ let ns13 = try NSRegularExpression ( pattern: #"<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# , options: . dotMatchesLineSeparators)
154+ _ = ns13. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
128155
129156 // BAD - doesn't match newlines in the end tag
130- let re14 = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script.*>"# ) . ignoresCase ( true )
131- _ = try re14. firstMatch ( in: tainted)
157+ let re14a = try Regex ( #"(?i)<script[^>]*?>[\s\S]*?<\/script.*>"# )
158+ _ = try re14a. firstMatch ( in: tainted)
159+ // BAD - doesn't match newlines in the end tag
160+ let re14b = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script.*>"# ) . ignoresCase ( true )
161+ _ = try re14b. firstMatch ( in: tainted)
162+ // BAD - doesn't match newlines in the end tag
163+ let ns14 = try NSRegularExpression ( pattern: #"<script[^>]*?>[\s\S]*?<\/script.*>"# , options: . caseInsensitive)
164+ _ = ns14. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
132165
133166 // GOOD
134- let re15 = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# ) . ignoresCase ( true )
135- _ = try re15. firstMatch ( in: tainted)
167+ let re15a = try Regex ( #"(?i)<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# )
168+ _ = try re15a. firstMatch ( in: tainted)
169+ // GOOD
170+ let re15b = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# ) . ignoresCase ( true )
171+ _ = try re15b. firstMatch ( in: tainted)
172+ // GOOD
173+ let ns15 = try NSRegularExpression ( pattern: #"<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# , options: . caseInsensitive)
174+ _ = ns15. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
136175
137176 // BAD - doesn't match comments with the right capture groups
138177 let re16 = try Regex ( #"<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>"# )
139178 _ = try re16. firstMatch ( in: tainted)
179+ // BAD - doesn't match comments with the right capture groups
180+ let ns16 = try NSRegularExpression ( pattern: #"<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>"# )
181+ _ = ns16. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
140182
141183 // BAD - capture groups
142184 let re17 = try Regex ( #"<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))"# )
143185 _ = try re17. firstMatch ( in: tainted)
186+ // BAD - capture groups
187+ let ns17 = try NSRegularExpression ( pattern: #"<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))"# , options: . caseInsensitive)
188+ _ = ns17. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
144189
145190 // BAD - too strict matching on the end tag
146- let ns1 = try NSRegularExpression ( pattern: #"<script\b[^>]*>([\s\S]*?)<\/script>"# , options: . caseInsensitive)
147- _ = ns1 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
191+ let ns2_1 = try NSRegularExpression ( pattern: #"<script\b[^>]*>([\s\S]*?)<\/script>"# , options: . caseInsensitive)
192+ _ = ns2_1 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
148193
149194 // BAD - capture groups
150- let ns2 = try NSRegularExpression ( pattern: #"(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)"# , options: . caseInsensitive)
151- _ = ns2 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
195+ let ns2_2 = try NSRegularExpression ( pattern: #"(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)"# , options: . caseInsensitive)
196+ _ = ns2_2 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
152197
153198 // BAD - capture groups
154- let ns3 = try NSRegularExpression ( pattern: #"<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))"# )
155- _ = ns3 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
199+ let ns2_3 = try NSRegularExpression ( pattern: #"<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))"# )
200+ _ = ns2_3 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
156201
157202 // BAD - capture groups
158- let ns4 = try NSRegularExpression ( pattern: #"<!--([\w\W]*?)-->|<([^>]*?)>"# )
159- _ = ns4 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
203+ let ns2_4 = try NSRegularExpression ( pattern: #"<!--([\w\W]*?)-->|<([^>]*?)>"# )
204+ _ = ns2_4 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
160205
161206 // GOOD - it's used with the ignorecase flag
162- let ns5 = try NSRegularExpression ( pattern: #"<script([^>]*)>([\\S\\s]*?)<\/script([^>]*)>"# , options: . caseInsensitive)
163- _ = ns5 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
207+ let ns2_5 = try NSRegularExpression ( pattern: #"<script([^>]*)>([\\S\\s]*?)<\/script([^>]*)>"# , options: . caseInsensitive)
208+ _ = ns2_5 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
164209
165210 // BAD - doesn't match --!>
166- let ns6 = try NSRegularExpression ( pattern: #"-->"# )
167- _ = ns6 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
211+ let ns2_6 = try NSRegularExpression ( pattern: #"-->"# )
212+ _ = ns2_6 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
168213
169214 // GOOD
170- let ns7 = try NSRegularExpression ( pattern: #"^>|^->|<!--|-->|--!>|<!-$"# )
171- _ = ns7 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
215+ let ns2_7 = try NSRegularExpression ( pattern: #"^>|^->|<!--|-->|--!>|<!-$"# )
216+ _ = ns2_7 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
172217}
0 commit comments