|
15 | 15 | * Gitignore matches against text.
|
16 | 16 | *
|
17 | 17 | * @author Ahmed Abdou <[email protected]>
|
| 18 | + * @author Michael Voříšek <[email protected]> |
18 | 19 | */
|
19 | 20 | class Gitignore
|
20 | 21 | {
|
21 | 22 | /**
|
22 | 23 | * Returns a regexp which is the equivalent of the gitignore pattern.
|
23 | 24 | *
|
| 25 | + * Format specification: https://git-scm.com/docs/gitignore#_pattern_format |
| 26 | + * |
24 | 27 | * @return string The regexp
|
25 | 28 | */
|
26 | 29 | public static function toRegex(string $gitignoreFileContent): string
|
27 | 30 | {
|
28 |
| - $gitignoreFileContent = preg_replace('/^[^\\\r\n]*#.*/m', '', $gitignoreFileContent); |
29 |
| - $gitignoreLines = preg_split('/\r\n|\r|\n/', $gitignoreFileContent); |
| 31 | + $gitignoreFileContent = preg_replace('~(?<!\\\\)#[^\n\r]*~', '', $gitignoreFileContent); |
| 32 | + $gitignoreLines = preg_split('~\r\n?|\n~', $gitignoreFileContent); |
30 | 33 |
|
31 |
| - $positives = []; |
32 |
| - $negatives = []; |
| 34 | + $res = self::lineToRegex(''); |
33 | 35 | foreach ($gitignoreLines as $i => $line) {
|
34 |
| - $line = trim($line); |
35 |
| - if ('' === $line) { |
36 |
| - continue; |
| 36 | + if (\strlen($line) < 2 || '\\' !== substr($line, -2, 1)) { |
| 37 | + $line = rtrim($line); |
37 | 38 | }
|
38 | 39 |
|
39 |
| - if (1 === preg_match('/^!/', $line)) { |
40 |
| - $positives[$i] = null; |
41 |
| - $negatives[$i] = self::getRegexFromGitignore(preg_replace('/^!(.*)/', '${1}', $line), true); |
42 |
| - |
43 |
| - continue; |
| 40 | + if (0 === strpos($line, '!')) { |
| 41 | + $line = substr($line, 1); |
| 42 | + if ('' !== $line) { |
| 43 | + $res = '(?!'.self::lineToRegex($line).')'.$res; |
| 44 | + } |
| 45 | + } elseif ('' !== $line) { |
| 46 | + $res = '(?:'.$res.'|'.self::lineToRegex($line).')'; |
44 | 47 | }
|
45 |
| - $negatives[$i] = null; |
46 |
| - $positives[$i] = self::getRegexFromGitignore($line); |
47 | 48 | }
|
48 | 49 |
|
49 |
| - $index = 0; |
50 |
| - $patterns = []; |
51 |
| - foreach ($positives as $pattern) { |
52 |
| - if (null === $pattern) { |
53 |
| - continue; |
54 |
| - } |
55 |
| - |
56 |
| - $negativesAfter = array_filter(\array_slice($negatives, ++$index)); |
57 |
| - if ([] !== $negativesAfter) { |
58 |
| - $pattern .= sprintf('(?<!%s)', implode('|', $negativesAfter)); |
59 |
| - } |
60 |
| - |
61 |
| - $patterns[] = $pattern; |
62 |
| - } |
63 |
| - |
64 |
| - return sprintf('/^((%s))$/', implode(')|(', $patterns)); |
| 50 | + return '~^(?:'.$res.')~s'; |
65 | 51 | }
|
66 | 52 |
|
67 |
| - private static function getRegexFromGitignore(string $gitignorePattern, bool $negative = false): string |
| 53 | + private static function lineToRegex(string $gitignoreLine): string |
68 | 54 | {
|
69 |
| - $regex = ''; |
70 |
| - $isRelativePath = false; |
71 |
| - // If there is a separator at the beginning or middle (or both) of the pattern, then the pattern is relative to the directory level of the particular .gitignore file itself |
72 |
| - $slashPosition = strpos($gitignorePattern, '/'); |
73 |
| - if (false !== $slashPosition && \strlen($gitignorePattern) - 1 !== $slashPosition) { |
74 |
| - if (0 === $slashPosition) { |
75 |
| - $gitignorePattern = substr($gitignorePattern, 1); |
76 |
| - } |
77 |
| - |
78 |
| - $isRelativePath = true; |
79 |
| - $regex .= '^'; |
80 |
| - } |
81 |
| - |
82 |
| - if ('/' === $gitignorePattern[\strlen($gitignorePattern) - 1]) { |
83 |
| - $gitignorePattern = substr($gitignorePattern, 0, -1); |
| 55 | + if ('' === $gitignoreLine) { |
| 56 | + return '$f'; // always false |
84 | 57 | }
|
85 | 58 |
|
86 |
| - $iMax = \strlen($gitignorePattern); |
87 |
| - for ($i = 0; $i < $iMax; ++$i) { |
88 |
| - $tripleChars = substr($gitignorePattern, $i, 3); |
89 |
| - if ('**/' === $tripleChars || '/**' === $tripleChars) { |
90 |
| - $regex .= '.*'; |
91 |
| - $i += 2; |
92 |
| - continue; |
93 |
| - } |
94 |
| - |
95 |
| - $doubleChars = substr($gitignorePattern, $i, 2); |
96 |
| - if ('**' === $doubleChars) { |
97 |
| - $regex .= '.*'; |
98 |
| - ++$i; |
99 |
| - continue; |
| 59 | + $slashPos = strpos($gitignoreLine, '/'); |
| 60 | + if (false !== $slashPos && \strlen($gitignoreLine) - 1 !== $slashPos) { |
| 61 | + if (0 === $slashPos) { |
| 62 | + $gitignoreLine = substr($gitignoreLine, 1); |
100 | 63 | }
|
101 |
| - if ('*/' === $doubleChars) { |
102 |
| - $regex .= '[^\/]*\/?[^\/]*'; |
103 |
| - ++$i; |
104 |
| - continue; |
105 |
| - } |
106 |
| - |
107 |
| - $c = $gitignorePattern[$i]; |
108 |
| - switch ($c) { |
109 |
| - case '*': |
110 |
| - $regex .= $isRelativePath ? '[^\/]*' : '[^\/]*\/?[^\/]*'; |
111 |
| - break; |
112 |
| - case '/': |
113 |
| - case '.': |
114 |
| - case ':': |
115 |
| - case '(': |
116 |
| - case ')': |
117 |
| - case '{': |
118 |
| - case '}': |
119 |
| - $regex .= '\\'.$c; |
120 |
| - break; |
121 |
| - default: |
122 |
| - $regex .= $c; |
123 |
| - } |
124 |
| - } |
125 | 64 |
|
126 |
| - if ($negative) { |
127 |
| - // a lookbehind assertion has to be a fixed width (it can not have nested '|' statements) |
128 |
| - return sprintf('%s$|%s\/$', $regex, $regex); |
| 65 | + $isAbsolute = true; |
| 66 | + } else { |
| 67 | + $isAbsolute = false; |
129 | 68 | }
|
130 | 69 |
|
131 |
| - return '(?>'.$regex.'($|\/.*))'; |
| 70 | + $parts = array_map(function (string $v): string { |
| 71 | + $v = preg_quote(str_replace('\\', '', $v), '~'); |
| 72 | + $v = preg_replace_callback('~\\\\\[([^\[\]]*)\\\\\]~', function (array $matches): string { |
| 73 | + return '['.str_replace('\\-', '-', $matches[1]).']'; |
| 74 | + }, $v); |
| 75 | + $v = preg_replace('~\\\\\*\\\\\*~', '[^/]+(?:/[^/]+)*', $v); |
| 76 | + $v = preg_replace('~\\\\\*~', '[^/]*', $v); |
| 77 | + $v = preg_replace('~\\\\\?~', '[^/]', $v); |
| 78 | + |
| 79 | + return $v; |
| 80 | + }, explode('/', $gitignoreLine)); |
| 81 | + |
| 82 | + return '(?:' |
| 83 | + .($isAbsolute ? '' : '(?:[^/]+/)*') |
| 84 | + .implode('/', $parts) |
| 85 | + .('' !== end($parts) ? '(?:$|/)' : '') |
| 86 | + .')'; |
132 | 87 | }
|
133 | 88 | }
|
0 commit comments