Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d1dd81b

Browse files
author
Edward Thomson
committed
win32: use NT-prefixed "\\?\" paths
When turning UTF-8 paths into UCS-2 paths for Windows, always use the \\?\-prefixed paths. Because this bypasses the system's path canonicalization, handle the canonicalization functions ourselves. We must: 1. always use a backslash as a directory separator 2. only use a single backslash between directories 3. not rely on the system to translate "." and ".." in paths 4. remove trailing backslashes, except at the drive root (C:\)
1 parent bf4f50a commit d1dd81b

File tree

10 files changed

+591
-103
lines changed

10 files changed

+591
-103
lines changed

src/win32/findfile.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* a Linking Exception. For full terms see the included COPYING file.
66
*/
77

8+
#include "path_w32.h"
89
#include "utf-conv.h"
910
#include "path.h"
1011
#include "findfile.h"

src/win32/path_w32.c

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
/*
2+
* Copyright (C) the libgit2 contributors. All rights reserved.
3+
*
4+
* This file is part of libgit2, distributed under the GNU GPL v2 with
5+
* a Linking Exception. For full terms see the included COPYING file.
6+
*/
7+
8+
#include "common.h"
9+
#include "path.h"
10+
#include "path_w32.h"
11+
#include "utf-conv.h"
12+
13+
#define PATH__NT_NAMESPACE L"\\\\?\\"
14+
#define PATH__NT_NAMESPACE_LEN 4
15+
16+
#define PATH__ABSOLUTE_LEN 3
17+
18+
#define path__is_dirsep(p) ((p) == '/' || (p) == '\\')
19+
20+
#define path__is_absolute(p) \
21+
(git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/'))
22+
23+
#define path__is_nt_namespace(p) \
24+
(((p)[0] == '\\' && (p)[1] == '\\' && (p)[2] == '?' && (p)[3] == '\\') || \
25+
((p)[0] == '/' && (p)[1] == '/' && (p)[2] == '?' && (p)[3] == '/'))
26+
27+
#define path__is_unc(p) \
28+
(((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/'))
29+
30+
GIT_INLINE(int) path__cwd(wchar_t *path, int size)
31+
{
32+
int len;
33+
34+
if ((len = GetCurrentDirectoryW(size, path)) == 0) {
35+
errno = GetLastError() == ERROR_ACCESS_DENIED ? EACCES : ENOENT;
36+
return -1;
37+
} else if (len > size) {
38+
errno = ENAMETOOLONG;
39+
return -1;
40+
}
41+
42+
/* The Win32 APIs may return "\\?\" once you've used it first.
43+
* But it may not. What a gloriously predictible API!
44+
*/
45+
if (wcsncmp(path, PATH__NT_NAMESPACE, PATH__NT_NAMESPACE_LEN))
46+
return len;
47+
48+
len -= PATH__NT_NAMESPACE_LEN;
49+
50+
memmove(path, path + PATH__NT_NAMESPACE_LEN, sizeof(wchar_t) * len);
51+
return len;
52+
}
53+
54+
static wchar_t *path__skip_server(wchar_t *path)
55+
{
56+
wchar_t *c;
57+
58+
for (c = path; *c; c++) {
59+
if (path__is_dirsep(*c))
60+
return c + 1;
61+
}
62+
63+
return c;
64+
}
65+
66+
static wchar_t *path__skip_prefix(wchar_t *path)
67+
{
68+
if (path__is_nt_namespace(path)) {
69+
path += PATH__NT_NAMESPACE_LEN;
70+
71+
if (wcsncmp(path, L"UNC\\", 4) == 0)
72+
path = path__skip_server(path + 4);
73+
else if (path__is_absolute(path))
74+
path += PATH__ABSOLUTE_LEN;
75+
} else if (path__is_absolute(path)) {
76+
path += PATH__ABSOLUTE_LEN;
77+
} else if (path__is_unc(path)) {
78+
path = path__skip_server(path + 2);
79+
}
80+
81+
return path;
82+
}
83+
84+
int git_win32_path_canonicalize(git_win32_path path)
85+
{
86+
wchar_t *base, *from, *to, *next;
87+
size_t len;
88+
89+
base = to = path__skip_prefix(path);
90+
91+
/* Unposixify if the prefix */
92+
for (from = path; from < to; from++) {
93+
if (*from == L'/')
94+
*from = L'\\';
95+
}
96+
97+
while (*from) {
98+
for (next = from; *next; ++next) {
99+
if (*next == L'/') {
100+
*next = L'\\';
101+
break;
102+
}
103+
104+
if (*next == L'\\')
105+
break;
106+
}
107+
108+
len = next - from;
109+
110+
if (len == 1 && from[0] == L'.')
111+
/* do nothing with singleton dot */;
112+
113+
else if (len == 2 && from[0] == L'.' && from[1] == L'.') {
114+
if (to == base) {
115+
/* no more path segments to strip, eat the "../" */
116+
if (*next == L'\\')
117+
len++;
118+
119+
base = to;
120+
} else {
121+
/* back up a path segment */
122+
while (to > base && to[-1] == L'\\') to--;
123+
while (to > base && to[-1] != L'\\') to--;
124+
}
125+
} else {
126+
if (*next == L'\\' && *from != L'\\')
127+
len++;
128+
129+
if (to != from)
130+
memmove(to, from, sizeof(wchar_t) * len);
131+
132+
to += len;
133+
}
134+
135+
from += len;
136+
137+
while (*from == L'\\') from++;
138+
}
139+
140+
/* Strip trailing backslashes */
141+
while (to > base && to[-1] == L'\\') to--;
142+
143+
*to = L'\0';
144+
145+
return (to - path);
146+
}
147+
148+
int git_win32_path__cwd(wchar_t *out, size_t len)
149+
{
150+
int cwd_len;
151+
152+
if ((cwd_len = path__cwd(out, len)) < 0)
153+
return -1;
154+
155+
/* UNC paths */
156+
if (wcsncmp(L"\\\\", out, 2) == 0) {
157+
/* Our buffer must be at least 5 characters larger than the
158+
* current working directory: we swallow one of the leading
159+
* '\'s, but we we add a 'UNC' specifier to the path, plus
160+
* a trailing directory separator, plus a NUL.
161+
*/
162+
if (cwd_len > MAX_PATH - 4) {
163+
errno = ENAMETOOLONG;
164+
return -1;
165+
}
166+
167+
memmove(out+2, out, sizeof(wchar_t) * cwd_len);
168+
out[0] = L'U';
169+
out[1] = L'N';
170+
out[2] = L'C';
171+
172+
cwd_len += 2;
173+
}
174+
175+
/* Our buffer must be at least 2 characters larger than the current
176+
* working directory. (One character for the directory separator,
177+
* one for the null.
178+
*/
179+
else if (cwd_len > MAX_PATH - 2) {
180+
errno = ENAMETOOLONG;
181+
return -1;
182+
}
183+
184+
return cwd_len;
185+
}
186+
187+
int git_win32_path_from_utf8(git_win32_path out, const char *src)
188+
{
189+
wchar_t *dest = out;
190+
191+
/* All win32 paths are in NT-prefixed format, beginning with "\\?\". */
192+
memcpy(dest, PATH__NT_NAMESPACE, sizeof(wchar_t) * PATH__NT_NAMESPACE_LEN);
193+
dest += PATH__NT_NAMESPACE_LEN;
194+
195+
/* See if this is an absolute path (beginning with a drive letter) */
196+
if (path__is_absolute(src)) {
197+
if (git__utf8_to_16(dest, MAX_PATH, src) < 0)
198+
return -1;
199+
}
200+
/* File-prefixed NT-style paths beginning with \\?\ */
201+
else if (path__is_nt_namespace(src)) {
202+
/* Skip the NT prefix, the destination already contains it */
203+
if (git__utf8_to_16(dest, MAX_PATH, src + PATH__NT_NAMESPACE_LEN) < 0)
204+
return -1;
205+
}
206+
/* UNC paths */
207+
else if (path__is_unc(src)) {
208+
memcpy(dest, L"UNC\\", sizeof(wchar_t) * 4);
209+
dest += 4;
210+
211+
/* Skip the leading "\\" */
212+
if (git__utf8_to_16(dest, MAX_PATH - 2, src + 2) < 0)
213+
return -1;
214+
}
215+
/* Absolute paths omitting the drive letter */
216+
else if (src[0] == '\\' || src[0] == '/') {
217+
if (path__cwd(dest, MAX_PATH) < 0)
218+
return -1;
219+
220+
if (!path__is_absolute(dest)) {
221+
errno = ENOENT;
222+
return -1;
223+
}
224+
225+
/* Skip the drive letter specification ("C:") */
226+
if (git__utf8_to_16(dest + 2, MAX_PATH - 2, src) < 0)
227+
return -1;
228+
}
229+
/* Relative paths */
230+
else {
231+
int cwd_len;
232+
233+
if ((cwd_len = git_win32_path__cwd(dest, MAX_PATH)) < 0)
234+
return -1;
235+
236+
dest[cwd_len++] = L'\\';
237+
238+
if (git__utf8_to_16(dest + cwd_len, MAX_PATH - cwd_len, src) < 0)
239+
return -1;
240+
}
241+
242+
return git_win32_path_canonicalize(out);
243+
}
244+
245+
int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
246+
{
247+
char *out = dest;
248+
int len;
249+
250+
/* Strip NT namespacing "\\?\" */
251+
if (path__is_nt_namespace(src)) {
252+
src += 4;
253+
254+
/* "\\?\UNC\server\share" -> "\\server\share" */
255+
if (wcsncmp(src, L"UNC\\", 4) == 0) {
256+
src += 4;
257+
258+
memcpy(dest, "\\\\", 2);
259+
out = dest + 2;
260+
}
261+
}
262+
263+
if ((len = git__utf16_to_8(out, GIT_WIN_PATH_UTF8, src)) < 0)
264+
return len;
265+
266+
git_path_mkposix(dest);
267+
268+
return len;
269+
}

src/win32/path_w32.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Copyright (C) the libgit2 contributors. All rights reserved.
3+
*
4+
* This file is part of libgit2, distributed under the GNU GPL v2 with
5+
* a Linking Exception. For full terms see the included COPYING file.
6+
*/
7+
#ifndef INCLUDE_git_path_w32_h__
8+
#define INCLUDE_git_path_w32_h__
9+
10+
/*
11+
* Provides a large enough buffer to support Windows paths: MAX_PATH is
12+
* 260, corresponding to a maximum path length of 259 characters plus a
13+
* NULL terminator. Prefixing with "\\?\" adds 4 characters, but if the
14+
* original was a UNC path, then we turn "\\server\share" into
15+
* "\\?\UNC\server\share". So we replace the first two characters with
16+
* 8 characters, a net gain of 6, so the maximum length is MAX_PATH+6.
17+
*/
18+
#define GIT_WIN_PATH_UTF16 MAX_PATH+6
19+
20+
/* Maximum size of a UTF-8 Win32 path. We remove the "\\?\" or "\\?\UNC\"
21+
* prefixes for presentation, bringing us back to 259 (non-NULL)
22+
* characters. UTF-8 does have 4-byte sequences, but they are encoded in
23+
* UTF-16 using surrogate pairs, which takes up the space of two characters.
24+
* Two characters in the range U+0800 -> U+FFFF take up more space in UTF-8
25+
* (6 bytes) than one surrogate pair (4 bytes).
26+
*/
27+
#define GIT_WIN_PATH_UTF8 (259 * 3 + 1)
28+
29+
/* Win32 path types */
30+
typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
31+
typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
32+
33+
/**
34+
* Create a Win32 path (in UCS-2 format) from a UTF-8 string.
35+
*
36+
* @param dest The buffer to receive the wide string.
37+
* @param src The UTF-8 string to convert.
38+
* @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
39+
*/
40+
extern int git_win32_path_from_utf8(git_win32_path dest, const char *src);
41+
42+
/**
43+
* Canonicalize a Win32 UCS-2 path so that it is suitable for delivery to the
44+
* Win32 APIs: remove multiple directory separators, squashing to a single one,
45+
* strip trailing directory separators, ensure directory separators are all
46+
* canonical (always backslashes, never forward slashes) and process any
47+
* directory entries of '.' or '..'.
48+
*
49+
* This processes the buffer in place.
50+
*
51+
* @param path The buffer to process
52+
* @return The new length of the buffer, in wchar_t's (not counting the NULL terminator)
53+
*/
54+
extern int git_win32_path_canonicalize(git_win32_path path);
55+
56+
/**
57+
* Create an internal format (posix-style) UTF-8 path from a Win32 UCS-2 path.
58+
*
59+
* @param dest The buffer to receive the UTF-8 string.
60+
* @param src The wide string to convert.
61+
* @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
62+
*/
63+
extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src);
64+
65+
#endif

src/win32/posix.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "common.h"
1111
#include "../posix.h"
12+
#include "path_w32.h"
1213
#include "utf-conv.h"
1314
#include "dir.h"
1415

0 commit comments

Comments
 (0)