Skip to content

Commit 86407b6

Browse files
bd1976brisc-rhodes
authored andcommitted
[Windows][Support] Add helper to expand short 8.3 form paths (#178480)
Windows supports short 8.3 form filenames (for example, `compile_commands.json` -> `COMPIL~1.JSO`) for legacy reasons. See: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#short-vs-long-names Such paths are not unusual because, on Windows, the system temporary directory is commonly derived from the `TMP`/`TEMP` environment variables. For historical compatibility reasons, these variables are often set to short 8.3 form paths on systems where user names exceed eight characters. Introduce `windows::makeLongFormPath()` to convert paths to their long form by expanding any 8.3 components via `GetLongPathNameW`. As part of this change: - Extended-length path prefix handling is centralized by adding `stripExtendedPrefix()` and reusing it in `realPathFromHandle()`. - `widenPath()` is cleaned up to use shared prefix constants. This was split out from #178303 at the request of the codeowner so that the Windows support parts can be reviewed separately. (cherry picked from commit e6f5e49)
1 parent affc059 commit 86407b6

File tree

3 files changed

+241
-15
lines changed

3 files changed

+241
-15
lines changed

llvm/include/llvm/Support/Windows/WindowsSupport.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,11 @@ LLVM_ABI std::error_code widenPath(const Twine &Path8,
249249
/// ensuring we're not retrieving a malicious injected module but a module
250250
/// loaded from the system path.
251251
LLVM_ABI HMODULE loadSystemModuleSecure(LPCWSTR lpModuleName);
252+
253+
/// Convert a UTF-8 path to a long form UTF-8 path expanding any short 8.3 form
254+
/// components.
255+
LLVM_ABI std::error_code makeLongFormPath(const Twine &Path8,
256+
llvm::SmallVectorImpl<char> &Result8);
252257
} // end namespace windows
253258
} // end namespace sys
254259
} // end namespace llvm.

llvm/lib/Support/Windows/Path.inc

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,34 @@ static bool is_separator(const wchar_t value) {
6060
}
6161
}
6262

63+
// Long path path prefix constants (UTF-8).
64+
static constexpr llvm::StringLiteral LongPathPrefix8 = R"(\\?\)";
65+
static constexpr llvm::StringLiteral LongPathUNCPrefix8 = R"(\\?\UNC\)";
66+
67+
// Long path prefix constants (UTF-16).
68+
static constexpr wchar_t LongPathPrefix16[] = LR"(\\?\)";
69+
static constexpr wchar_t LongPathUNCPrefix16[] = LR"(\\?\UNC\)";
70+
71+
static constexpr DWORD LongPathPrefix16Len =
72+
static_cast<DWORD>(std::size(LongPathPrefix16) - 1);
73+
static constexpr DWORD LongPathUNCPrefix16Len =
74+
static_cast<DWORD>(std::size(LongPathUNCPrefix16) - 1);
75+
76+
static void stripLongPathPrefix(wchar_t *&Data, DWORD &CountChars) {
77+
if (CountChars >= LongPathUNCPrefix16Len &&
78+
::wmemcmp(Data, LongPathUNCPrefix16, LongPathUNCPrefix16Len) == 0) {
79+
// Convert \\?\UNC\foo\bar to \\foo\bar
80+
CountChars -= 6;
81+
Data += 6;
82+
Data[0] = L'\\';
83+
} else if (CountChars >= LongPathPrefix16Len &&
84+
::wmemcmp(Data, LongPathPrefix16, LongPathPrefix16Len) == 0) {
85+
// Convert \\?\C:\foo to C:\foo
86+
CountChars -= 4;
87+
Data += 4;
88+
}
89+
}
90+
6391
namespace llvm {
6492
namespace sys {
6593
namespace windows {
@@ -95,10 +123,8 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
95123
return mapWindowsError(::GetLastError());
96124
}
97125

98-
const char *const LongPathPrefix = "\\\\?\\";
99-
100126
if ((Path16.size() + CurPathLen) < MaxPathLen ||
101-
Path8Str.starts_with(LongPathPrefix))
127+
Path8Str.starts_with(LongPathPrefix8))
102128
return std::error_code();
103129

104130
if (!IsAbsolute) {
@@ -116,17 +142,62 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
116142
assert(!RootName.empty() &&
117143
"Root name cannot be empty for an absolute path!");
118144

119-
SmallString<2 * MAX_PATH> FullPath(LongPathPrefix);
145+
SmallString<2 * MAX_PATH> FullPath;
120146
if (RootName[1] != ':') { // Check if UNC.
121-
FullPath.append("UNC\\");
147+
FullPath.append(LongPathUNCPrefix8);
122148
FullPath.append(Path8Str.begin() + 2, Path8Str.end());
123149
} else {
150+
FullPath.append(LongPathPrefix8);
124151
FullPath.append(Path8Str);
125152
}
126153

127154
return UTF8ToUTF16(FullPath, Path16);
128155
}
129156

157+
std::error_code makeLongFormPath(const Twine &Path8,
158+
llvm::SmallVectorImpl<char> &Result8) {
159+
SmallString<128> PathStorage;
160+
StringRef PathStr = Path8.toStringRef(PathStorage);
161+
bool HadPrefix = PathStr.starts_with(LongPathPrefix8);
162+
163+
SmallVector<wchar_t, 128> Path16;
164+
if (std::error_code EC = widenPath(PathStr, Path16))
165+
return EC;
166+
167+
// Start with a buffer equal to input.
168+
llvm::SmallVector<wchar_t, 128> Long16;
169+
DWORD Len = static_cast<DWORD>(Path16.size());
170+
171+
// Loop instead of a double call to be defensive against TOCTOU races.
172+
do {
173+
Long16.resize_for_overwrite(Len);
174+
175+
Len = ::GetLongPathNameW(Path16.data(), Long16.data(), Len);
176+
177+
// A zero return value indicates a failure other than insufficient space.
178+
if (Len == 0)
179+
return mapWindowsError(::GetLastError());
180+
181+
// If there's insufficient space, the return value is the required size in
182+
// characters *including* the null terminator, and therefore greater than
183+
// the buffer size we provided. Equality would imply success with no room
184+
// for the terminator and should not occur for this API.
185+
assert(Len != Long16.size());
186+
} while (Len > Long16.size());
187+
188+
// On success, GetLongPathNameW returns the number of characters not
189+
// including the null-terminator.
190+
Long16.truncate(Len);
191+
192+
// Strip \\?\ or \\?\UNC\ long length prefix if it wasn't part of the
193+
// original path.
194+
wchar_t *Data = Long16.data();
195+
if (!HadPrefix)
196+
stripLongPathPrefix(Data, Len);
197+
198+
return sys::windows::UTF16ToUTF8(Data, Len, Result8);
199+
}
200+
130201
} // end namespace windows
131202

132203
namespace fs {
@@ -407,16 +478,7 @@ static std::error_code realPathFromHandle(HANDLE H,
407478
// paths don't get canonicalized by file APIs.
408479
wchar_t *Data = Buffer.data();
409480
DWORD CountChars = Buffer.size();
410-
if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) {
411-
// Convert \\?\UNC\foo\bar to \\foo\bar
412-
CountChars -= 6;
413-
Data += 6;
414-
Data[0] = '\\';
415-
} else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) {
416-
// Convert \\?\c:\foo to c:\foo
417-
CountChars -= 4;
418-
Data += 4;
419-
}
481+
stripLongPathPrefix(Data, CountChars);
420482

421483
// Convert the result from UTF-16 to UTF-8.
422484
if (std::error_code EC = UTF16ToUTF8(Data, CountChars, RealPath))

llvm/unittests/Support/Path.cpp

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
#include "llvm/ADT/ArrayRef.h"
3333
#include "llvm/Support/Chrono.h"
3434
#include "llvm/Support/Windows/WindowsSupport.h"
35+
#include "llvm/Support/WindowsError.h"
36+
#include <fileapi.h>
3537
#include <windows.h>
3638
#include <winerror.h>
3739
#endif
@@ -2485,6 +2487,163 @@ TEST_F(FileSystemTest, widenPath) {
24852487
#endif
24862488

24872489
#ifdef _WIN32
2490+
/// Checks whether short 8.3 form names are enabled in the given UTF-8 path.
2491+
static llvm::Expected<bool> areShortNamesEnabled(llvm::StringRef Path8) {
2492+
// Create a directory under Path8 with a name long enough that Windows will
2493+
// provide a short 8.3 form name, if short 8.3 form names are enabled.
2494+
SmallString<MAX_PATH> Dir(Path8);
2495+
path::append(Dir, "verylongdir");
2496+
if (std::error_code EC = fs::create_directories(Dir))
2497+
return llvm::errorCodeToError(EC);
2498+
scope_exit Close([&] { fs::remove_directories(Dir); });
2499+
2500+
SmallVector<wchar_t, MAX_PATH> Path16;
2501+
if (std::error_code EC = sys::windows::widenPath(Dir, Path16))
2502+
return llvm::errorCodeToError(EC);
2503+
2504+
WIN32_FIND_DATAW Data;
2505+
HANDLE H = ::FindFirstFileW(Path16.data(), &Data);
2506+
if (H == INVALID_HANDLE_VALUE)
2507+
return llvm::errorCodeToError(llvm::mapWindowsError(::GetLastError()));
2508+
::FindClose(H);
2509+
2510+
return (Data.cAlternateFileName[0] != L'\0');
2511+
}
2512+
2513+
/// Returns the short 8.3 form path for the given UTF-8 path, or an empty string
2514+
/// on failure. Uses Win32 GetShortPathNameW.
2515+
static std::string getShortPathName(llvm::StringRef Path8) {
2516+
// Convert UTF-8 to UTF-16.
2517+
SmallVector<wchar_t, MAX_PATH> Path16;
2518+
if (std::error_code EC = sys::windows::widenPath(Path8, Path16))
2519+
return {};
2520+
2521+
// Get the required buffer size for the short 8.3 form path (includes null
2522+
// terminator).
2523+
DWORD Required = ::GetShortPathNameW(Path16.data(), nullptr, 0);
2524+
if (Required == 0)
2525+
return {};
2526+
2527+
SmallVector<wchar_t, MAX_PATH> ShortPath;
2528+
ShortPath.resize_for_overwrite(Required);
2529+
2530+
DWORD Written =
2531+
::GetShortPathNameW(Path16.data(), ShortPath.data(), Required);
2532+
if (Written == 0 || Written >= Required)
2533+
return {};
2534+
2535+
ShortPath.truncate(Written);
2536+
2537+
SmallString<MAX_PATH> Utf8Result;
2538+
if (std::error_code EC = sys::windows::UTF16ToUTF8(
2539+
ShortPath.data(), ShortPath.size(), Utf8Result))
2540+
return {};
2541+
2542+
return std::string(Utf8Result);
2543+
}
2544+
2545+
/// Returns true if the two paths refer to the same file or directory by
2546+
/// comparing their UniqueIDs.
2547+
static bool sameEntity(llvm::StringRef P1, llvm::StringRef P2) {
2548+
fs::UniqueID ID1, ID2;
2549+
return !fs::getUniqueID(P1, ID1) && !fs::getUniqueID(P2, ID2) && ID1 == ID2;
2550+
}
2551+
2552+
/// Removes the Windows long path path prefix (\\?\ or \\?\UNC\) from the given
2553+
/// UTF-8 path, if present.
2554+
static std::string stripPrefix(llvm::StringRef P) {
2555+
if (P.starts_with(R"(\\?\UNC\)"))
2556+
return "\\" + P.drop_front(7).str();
2557+
if (P.starts_with(R"(\\?\)"))
2558+
return P.drop_front(4).str();
2559+
return P.str();
2560+
}
2561+
2562+
TEST_F(FileSystemTest, makeLongFormPath) {
2563+
auto Enabled = areShortNamesEnabled(TestDirectory.str());
2564+
ASSERT_TRUE(static_cast<bool>(Enabled))
2565+
<< llvm::toString(Enabled.takeError());
2566+
if (!*Enabled)
2567+
GTEST_SKIP() << "Short 8.3 form names not enabled in: " << TestDirectory;
2568+
2569+
// Setup: A test directory longer than 8 characters for which a distinct
2570+
// short 8.3 form name will be created on Windows. Typically, 123456~1.
2571+
constexpr const char *OneDir = "\\123456789"; // >8 chars
2572+
2573+
// Setup: Create a path where even if all components were reduced to short 8.3
2574+
// form names, the total length would exceed MAX_PATH.
2575+
SmallString<MAX_PATH * 2> Deep(TestDirectory);
2576+
const size_t NLevels = (MAX_PATH / 8) + 1;
2577+
for (size_t I = 0; I < NLevels; ++I)
2578+
Deep.append(OneDir);
2579+
2580+
ASSERT_NO_ERROR(fs::create_directories(Deep));
2581+
2582+
// Setup: Create prefixed and non-prefixed short 8.3 form paths from the deep
2583+
// test path we just created.
2584+
std::string DeepShortWithPrefix = getShortPathName(Deep);
2585+
ASSERT_TRUE(StringRef(DeepShortWithPrefix).starts_with(R"(\\?\)"))
2586+
<< "Expected prefixed short 8.3 form path, got: " << DeepShortWithPrefix;
2587+
std::string DeepShort = stripPrefix(DeepShortWithPrefix);
2588+
2589+
// Setup: Create a short 8.3 form path for the first-level directory.
2590+
SmallString<MAX_PATH> FirstLevel(TestDirectory);
2591+
FirstLevel.append(OneDir);
2592+
std::string Short = getShortPathName(FirstLevel);
2593+
ASSERT_FALSE(Short.empty())
2594+
<< "Expected short 8.3 form path for test directory.";
2595+
2596+
// Setup: Create a short 8.3 form path with . and .. components for the
2597+
// first-level directory.
2598+
llvm::SmallString<MAX_PATH> WithDots(FirstLevel);
2599+
llvm::sys::path::append(WithDots, ".", "..", OneDir);
2600+
std::string DotAndDotDot = getShortPathName(WithDots);
2601+
ASSERT_FALSE(DotAndDotDot.empty())
2602+
<< "Expected short 8.3 form path for test directory.";
2603+
auto ContainsDotAndDotDot = [](llvm::StringRef S) {
2604+
return S.contains("\\.\\") && S.contains("\\..\\");
2605+
};
2606+
ASSERT_TRUE(ContainsDotAndDotDot(DotAndDotDot))
2607+
<< "Expected '.' and '..' components in: " << DotAndDotDot;
2608+
2609+
// Case 1: Non-existent short 8.3 form path.
2610+
SmallString<MAX_PATH> NoExist("NotEre~1");
2611+
ASSERT_FALSE(fs::exists(NoExist));
2612+
SmallString<MAX_PATH> NoExistResult;
2613+
EXPECT_TRUE(windows::makeLongFormPath(NoExist, NoExistResult));
2614+
EXPECT_TRUE(NoExistResult.empty());
2615+
2616+
// Case 2: Valid short 8.3 form path.
2617+
SmallString<MAX_PATH> ShortResult;
2618+
ASSERT_FALSE(windows::makeLongFormPath(Short, ShortResult));
2619+
EXPECT_TRUE(sameEntity(Short, ShortResult));
2620+
2621+
// Case 3: Valid . and .. short 8.3 form path.
2622+
SmallString<MAX_PATH> DotAndDotDotResult;
2623+
ASSERT_FALSE(windows::makeLongFormPath(DotAndDotDot, DotAndDotDotResult));
2624+
EXPECT_TRUE(sameEntity(DotAndDotDot, DotAndDotDotResult));
2625+
// Assert that '.' and '..' remain as path components.
2626+
ASSERT_TRUE(ContainsDotAndDotDot(DotAndDotDotResult));
2627+
2628+
// Case 4: Deep short 8.3 form path without \\?\ prefix.
2629+
SmallString<MAX_PATH> DeepResult;
2630+
ASSERT_FALSE(windows::makeLongFormPath(DeepShort, DeepResult));
2631+
EXPECT_TRUE(sameEntity(DeepShort, DeepResult));
2632+
EXPECT_FALSE(StringRef(DeepResult).starts_with(R"(\\?\)"))
2633+
<< "Expected unprefixed result, got: " << DeepResult;
2634+
2635+
// Case 5: Deep short 8.3 form path with \\?\ prefix.
2636+
SmallString<MAX_PATH> DeepPrefixedResult;
2637+
ASSERT_FALSE(
2638+
windows::makeLongFormPath(DeepShortWithPrefix, DeepPrefixedResult));
2639+
EXPECT_TRUE(sameEntity(DeepShortWithPrefix, DeepPrefixedResult));
2640+
EXPECT_TRUE(StringRef(DeepPrefixedResult).starts_with(R"(\\?\)"))
2641+
<< "Expected prefixed result, got: " << DeepPrefixedResult;
2642+
2643+
// Cleanup.
2644+
ASSERT_NO_ERROR(fs::remove_directories(TestDirectory.str()));
2645+
}
2646+
24882647
// Windows refuses lock request if file region is already locked by the same
24892648
// process. POSIX system in this case updates the existing lock.
24902649
TEST_F(FileSystemTest, FileLocker) {

0 commit comments

Comments
 (0)