Merge pull request #7 from eirikbakke/unicodeLaunching
Make the Windows launcher work with Unicode paths
diff --git a/src/main/cpp/bootstrap/nbexec.exe.manifest b/src/main/cpp/bootstrap/nbexec.exe.manifest
index cfc9190..580bb41 100644
--- a/src/main/cpp/bootstrap/nbexec.exe.manifest
+++ b/src/main/cpp/bootstrap/nbexec.exe.manifest
@@ -48,6 +48,12 @@
</requestedPrivileges>
</security>
</trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+ <windowsSettings>
+ <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+ </windowsSettings>
+</application>
<!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
<asmv3:application xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
<asmv3:windowsSettings xmlns:dpi1="http://schemas.microsoft.com/SMI/2005/WindowsSettings" xmlns:dpi2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
diff --git a/src/main/cpp/bootstrap/utilsfuncs.cpp b/src/main/cpp/bootstrap/utilsfuncs.cpp
index 2902b1e..16c6ce0 100644
--- a/src/main/cpp/bootstrap/utilsfuncs.cpp
+++ b/src/main/cpp/bootstrap/utilsfuncs.cpp
@@ -276,6 +276,19 @@
return true;
}
+void setConsoleCodepage() {
+ /* The Windows console (cmd) has its own code page setting that's usually different from the
+ system and user code page, e.g. on US Windows the console will use code page 437 while the
+ rest of the system uses 1252. Setting the console code page here to UTF-8 makes Unicode
+ characters printed from the application appear correctly. Since the launcher itself also runs
+ with UTF-8 as its code page (specified in the application manifest), this also makes log
+ messages from the launchers appear correctly, e.g. when printing paths that may have Unicode
+ characters in them. Note that if we attached to an existing console, the modified code page
+ setting will persist after the launcher exits. */
+ SetConsoleOutputCP(CP_UTF8);
+ SetConsoleCP(CP_UTF8);
+}
+
bool setupProcess(int &argc, char *argv[], DWORD &parentProcID, const char *attachMsg) {
#define CHECK_ARG \
if (i+1 == argc) {\
@@ -290,6 +303,7 @@
CHECK_ARG;
if (strcmp("new", argv[i + 1]) == 0){
AllocConsole();
+ setConsoleCodepage();
} else if (strcmp("suppress", argv[i + 1]) == 0) {
// nothing, no console should be attached
} else {
@@ -332,6 +346,7 @@
logErr(true, false, "AttachConsole of PP failed.");
} else {
getParentProcessID(parentProcID);
+ setConsoleCodepage();
if (attachMsg) {
printToConsole(attachMsg);
}
diff --git a/src/main/cpp/harness/app.exe.manifest b/src/main/cpp/harness/app.exe.manifest
index 26921b3..c1843b2 100644
--- a/src/main/cpp/harness/app.exe.manifest
+++ b/src/main/cpp/harness/app.exe.manifest
@@ -48,6 +48,12 @@
</requestedPrivileges>
</security>
</trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+ <windowsSettings>
+ <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+ </windowsSettings>
+</application>
<!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
<asmv3:application xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
<asmv3:windowsSettings xmlns:dpi1="http://schemas.microsoft.com/SMI/2005/WindowsSettings" xmlns:dpi2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
diff --git a/src/main/cpp/ide/nblauncher.cpp b/src/main/cpp/ide/nblauncher.cpp
index 2bd940e..393e2cd 100644
--- a/src/main/cpp/ide/nblauncher.cpp
+++ b/src/main/cpp/ide/nblauncher.cpp
@@ -25,6 +25,7 @@
#endif
#include <shlobj.h>
+#include <winnls.h>
#include "nblauncher.h"
#include "../bootstrap/utilsfuncs.h"
#include "../bootstrap/argnames.h"
@@ -157,6 +158,20 @@
return loader.start(nbexecPath.c_str(), newArgs.getCount(), newArgs.getArgs());
}
+UINT GetAnsiCodePageForLocale(LCID lcid) {
+ // See https://devblogs.microsoft.com/oldnewthing/20161007-00/?p=94475
+ UINT acp;
+ int sizeInChars = sizeof(acp) / sizeof(TCHAR);
+ if (GetLocaleInfo(lcid,
+ LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
+ reinterpret_cast<LPTSTR>(&acp),
+ sizeInChars) != sizeInChars)
+ {
+ return 0;
+ }
+ return acp;
+}
+
bool NbLauncher::initBaseNames() {
char path[MAX_PATH] = "";
getCurrentModulePath(path, MAX_PATH);
@@ -181,49 +196,23 @@
}
*bslash = '\0';
+ /* Useful messages for debugging character set issues. On Java versions where
+ https://bugs.openjdk.org/browse/JDK-8272352 has been fixed, NetBeans should now run fine when
+ there are Unicode characters in the NetBeans installation path, the JDK path, the user/cache
+ directory paths, or in the java.io.tmpdir path (the latter sometimes being a problem for JNA,
+ which is used by FlatLAF). Since the JVM is started in-process via JNI, the Java environment
+ will inherit the UTF-8 code page setting that we have set in the launcher's application
+ manifest, without requiring the user to change regional settings in the Control Panel. (JEP 400
+ might eventually do something similar for the java.exe/javaw.exe executables. See
+ https://www.mail-archive.com/core-libs-dev@openjdk.java.net/msg80489.html .) */
+ logMsg("ANSI code page per GetACP() : %d", GetACP());
+ logMsg("ANSI code page per GetConsoleCP() : %d", GetConsoleCP());
+ logMsg("ANSI code page for GetThreadLocale() : %d", GetAnsiCodePageForLocale(GetThreadLocale()));
+ logMsg("ANSI code page for GetUserDefaultLCID() : %d", GetAnsiCodePageForLocale(GetUserDefaultLCID()));
+ logMsg("ANSI code page for GetSystemDefaultLCID(): %d", GetAnsiCodePageForLocale(GetSystemDefaultLCID()));
+
baseDir = path;
-
- /* The JavaVMOption.optionString interface forces us to stick to ANSI
- strings only, using whichever codepage is the default on the current Windows
- installation (e.g. windows-1252 for US Windows). For any Unicode characters
- that cannot be encoded using the current ANSI codepage, Win32 functions
- such as GetModuleFileName (used by getCurrentModulePath) and
- GetCurrentDirectory will substitute a question mark, which we detect here.
- Note that the ANSI codepage is a superset of ASCII; it can accomodate a
- limited selection of international characters that Microsoft once considered
- appropriate for the current Windows locale.
- It would be easy enough to switch the launcher process to UTF-8 everywhere;
- this can be configured from the manifest file
- (see https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page ).
- String types in these sources could remain as "char *" rather than
- wchar_t. The problem is that JNI will still seems to expect parameters to be
- passed using the default Windows codepage.
-
- I tried setting UTF8 in the manifests and using the --fork-java parameter
- to use the old CreateProcess launcher rather than JNI, but this still
- causes a "Could not find or load main class org.netbeans.Main" error. I
- also tried doing MultiByteToWideChar from UTF8 to wchar_t and calling
- CreateProcessW; it does not fix the problem even though changing the command
- line to prefix "cmd /c echo" causes my Cyrillic test character to show up
- correctly on the Windows command line.
-
- Other approaches which were attempted, but demeed too fragile:
- 1) Set the current directory to baseDir and pass relative paths only.
- (Still led to ClassNotFoundException from ProxyClassLoader, which would
- have needed to be fixed. And doesn't work e.g. for the home directory,
- e.g. if the username itself has problematic characters in it.)
- 2) Using the GetShortPathNameW function to get an equivalent
- Windows 95 style "8.3" compatibility path (e.g. "C:\Users\CHARTE~1").
- This worked, but is too likely to create problems down the line.
- */
- for (size_t i = 0; i < baseDir.size(); ++i) {
- if (baseDir[i] == '?') {
- logErr(false, true, "Cannot run in this folder; the path \"%s\" contains problematic characters.", path);
- return false;
- }
- }
-
logMsg("Base dir: %s", baseDir.c_str());
return true;
}
diff --git a/src/main/cpp/ide/netbeans.exe.manifest b/src/main/cpp/ide/netbeans.exe.manifest
index 71b1164..2dda7fb 100644
--- a/src/main/cpp/ide/netbeans.exe.manifest
+++ b/src/main/cpp/ide/netbeans.exe.manifest
@@ -48,6 +48,12 @@
</requestedPrivileges>
</security>
</trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+ <windowsSettings>
+ <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+ </windowsSettings>
+</application>
<!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
<!-- Note that even 32-bit Java 10.0.2 indicates HiDPI-awareness, so it should
be fine to include it here as well. -->
diff --git a/src/main/cpp/ide/netbeans64.exe.manifest b/src/main/cpp/ide/netbeans64.exe.manifest
index 3f7dc6e..b1d9a5f 100644
--- a/src/main/cpp/ide/netbeans64.exe.manifest
+++ b/src/main/cpp/ide/netbeans64.exe.manifest
@@ -50,6 +50,12 @@
</requestedPrivileges>
</security>
</trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+ <windowsSettings>
+ <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+ </windowsSettings>
+</application>
<!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
<asmv3:application xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
<asmv3:windowsSettings xmlns:dpi1="http://schemas.microsoft.com/SMI/2005/WindowsSettings" xmlns:dpi2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">