Merge pull request #7 from eirikbakke/unicodeLaunching

Make the Windows launcher work with Unicode paths
diff --git a/src/main/cpp/bootstrap/nbexec.exe.manifest b/src/main/cpp/bootstrap/nbexec.exe.manifest
index cfc9190..580bb41 100644
--- a/src/main/cpp/bootstrap/nbexec.exe.manifest
+++ b/src/main/cpp/bootstrap/nbexec.exe.manifest
@@ -48,6 +48,12 @@
       </requestedPrivileges>
      </security>
 </trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+  <windowsSettings>
+    <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+  </windowsSettings>
+</application>
 <!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
 <asmv3:application xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
   <asmv3:windowsSettings xmlns:dpi1="http://schemas.microsoft.com/SMI/2005/WindowsSettings" xmlns:dpi2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
diff --git a/src/main/cpp/bootstrap/utilsfuncs.cpp b/src/main/cpp/bootstrap/utilsfuncs.cpp
index 2902b1e..16c6ce0 100644
--- a/src/main/cpp/bootstrap/utilsfuncs.cpp
+++ b/src/main/cpp/bootstrap/utilsfuncs.cpp
@@ -276,6 +276,19 @@
     return true;
 }
 
+void setConsoleCodepage() {
+    /* The Windows console (cmd) has its own code page setting that's usually different from the
+    system and user code page, e.g. on US Windows the console will use code page 437 while the
+    rest of the system uses 1252. Setting the console code page here to UTF-8 makes Unicode
+    characters printed from the application appear correctly. Since the launcher itself also runs
+    with UTF-8 as its code page (specified in the application manifest), this also makes log
+    messages from the launchers appear correctly, e.g. when printing paths that may have Unicode
+    characters in them. Note that if we attached to an existing console, the modified code page
+    setting will persist after the launcher exits. */
+    SetConsoleOutputCP(CP_UTF8);
+    SetConsoleCP(CP_UTF8);
+}
+
 bool setupProcess(int &argc, char *argv[], DWORD &parentProcID, const char *attachMsg) {
 #define CHECK_ARG \
     if (i+1 == argc) {\
@@ -290,6 +303,7 @@
             CHECK_ARG;
             if (strcmp("new", argv[i + 1]) == 0){
                 AllocConsole();
+                setConsoleCodepage();
             } else if (strcmp("suppress", argv[i + 1]) == 0) {
                 // nothing, no console should be attached
             } else {
@@ -332,6 +346,7 @@
                     logErr(true, false, "AttachConsole of PP failed.");
                 } else {
                     getParentProcessID(parentProcID);
+                    setConsoleCodepage();
                     if (attachMsg) {
                         printToConsole(attachMsg);
                     }
diff --git a/src/main/cpp/harness/app.exe.manifest b/src/main/cpp/harness/app.exe.manifest
index 26921b3..c1843b2 100644
--- a/src/main/cpp/harness/app.exe.manifest
+++ b/src/main/cpp/harness/app.exe.manifest
@@ -48,6 +48,12 @@
       </requestedPrivileges>
      </security>
 </trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+  <windowsSettings>
+    <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+  </windowsSettings>
+</application>
 <!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
 <asmv3:application xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
   <asmv3:windowsSettings xmlns:dpi1="http://schemas.microsoft.com/SMI/2005/WindowsSettings" xmlns:dpi2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
diff --git a/src/main/cpp/ide/nblauncher.cpp b/src/main/cpp/ide/nblauncher.cpp
index 2bd940e..393e2cd 100644
--- a/src/main/cpp/ide/nblauncher.cpp
+++ b/src/main/cpp/ide/nblauncher.cpp
@@ -25,6 +25,7 @@
 #endif
 
 #include <shlobj.h>
+#include <winnls.h>
 #include "nblauncher.h"
 #include "../bootstrap/utilsfuncs.h"
 #include "../bootstrap/argnames.h"
@@ -157,6 +158,20 @@
     return loader.start(nbexecPath.c_str(), newArgs.getCount(), newArgs.getArgs());
 }
 
+UINT GetAnsiCodePageForLocale(LCID lcid) {
+    // See https://devblogs.microsoft.com/oldnewthing/20161007-00/?p=94475
+    UINT acp;
+    int sizeInChars = sizeof(acp) / sizeof(TCHAR);
+    if (GetLocaleInfo(lcid,
+                      LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
+                      reinterpret_cast<LPTSTR>(&acp),
+                      sizeInChars) != sizeInChars)
+    {
+        return 0;
+    }
+    return acp;
+}
+
 bool NbLauncher::initBaseNames() {
     char path[MAX_PATH] = "";
     getCurrentModulePath(path, MAX_PATH);
@@ -181,49 +196,23 @@
     }
     *bslash = '\0';        
 
+    /* Useful messages for debugging character set issues. On Java versions where
+    https://bugs.openjdk.org/browse/JDK-8272352 has been fixed, NetBeans should now run fine when
+    there are Unicode characters in the NetBeans installation path, the JDK path, the user/cache
+    directory paths, or in the java.io.tmpdir path (the latter sometimes being a problem for JNA,
+    which is used by FlatLAF). Since the JVM is started in-process via JNI, the Java environment
+    will inherit the UTF-8 code page setting that we have set in the launcher's application
+    manifest, without requiring the user to change regional settings in the Control Panel. (JEP 400
+    might eventually do something similar for the java.exe/javaw.exe executables. See
+    https://www.mail-archive.com/core-libs-dev@openjdk.java.net/msg80489.html .) */
+    logMsg("ANSI code page per GetACP()              : %d", GetACP());
+    logMsg("ANSI code page per GetConsoleCP()        : %d", GetConsoleCP());
+    logMsg("ANSI code page for GetThreadLocale()     : %d", GetAnsiCodePageForLocale(GetThreadLocale()));
+    logMsg("ANSI code page for GetUserDefaultLCID()  : %d", GetAnsiCodePageForLocale(GetUserDefaultLCID()));
+    logMsg("ANSI code page for GetSystemDefaultLCID(): %d", GetAnsiCodePageForLocale(GetSystemDefaultLCID()));
+
     baseDir = path;
-    
-    /* The JavaVMOption.optionString interface forces us to stick to ANSI
-    strings only, using whichever codepage is the default on the current Windows
-    installation (e.g. windows-1252 for US Windows). For any Unicode characters
-    that cannot be encoded using the current ANSI codepage, Win32 functions
-    such as GetModuleFileName (used by getCurrentModulePath) and
-    GetCurrentDirectory will substitute a question mark, which we detect here.
-    Note that the ANSI codepage is a superset of ASCII; it can accomodate a
-    limited selection of international characters that Microsoft once considered
-    appropriate for the current Windows locale.
 
-    It would be easy enough to switch the launcher process to UTF-8 everywhere;
-    this can be configured from the manifest file
-    (see https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page ).
-    String types in these sources could remain as "char *" rather than
-    wchar_t. The problem is that JNI will still seems to expect parameters to be
-    passed using the default Windows codepage.
-
-    I tried setting UTF8 in the manifests and using the --fork-java parameter
-    to use the old CreateProcess launcher rather than JNI, but this still
-    causes a "Could not find or load main class org.netbeans.Main" error. I
-    also tried doing MultiByteToWideChar from UTF8 to wchar_t and calling
-    CreateProcessW; it does not fix the problem even though changing the command
-    line to prefix "cmd /c echo" causes my Cyrillic test character to show up
-    correctly on the Windows command line.
-
-    Other approaches which were attempted, but demeed too fragile:
-    1) Set the current directory to baseDir and pass relative paths only.
-       (Still led to ClassNotFoundException from ProxyClassLoader, which would
-       have needed to be fixed. And doesn't work e.g. for the home directory,
-       e.g. if the username itself has problematic characters in it.)
-    2) Using the GetShortPathNameW function to get an equivalent
-       Windows 95 style "8.3" compatibility path (e.g. "C:\Users\CHARTE~1").
-       This worked, but is too likely to create problems down the line.
-    */
-    for (size_t i = 0; i < baseDir.size(); ++i) {
-        if (baseDir[i] == '?') {
-            logErr(false, true, "Cannot run in this folder; the path \"%s\" contains problematic characters.", path);
-            return false;
-        }
-    }
-    
     logMsg("Base dir: %s", baseDir.c_str());
     return true;
 }
diff --git a/src/main/cpp/ide/netbeans.exe.manifest b/src/main/cpp/ide/netbeans.exe.manifest
index 71b1164..2dda7fb 100644
--- a/src/main/cpp/ide/netbeans.exe.manifest
+++ b/src/main/cpp/ide/netbeans.exe.manifest
@@ -48,6 +48,12 @@
       </requestedPrivileges>
      </security>
 </trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+  <windowsSettings>
+    <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+  </windowsSettings>
+</application>
 <!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
 <!-- Note that even 32-bit Java 10.0.2 indicates HiDPI-awareness, so it should
      be fine to include it here as well. -->
diff --git a/src/main/cpp/ide/netbeans64.exe.manifest b/src/main/cpp/ide/netbeans64.exe.manifest
index 3f7dc6e..b1d9a5f 100644
--- a/src/main/cpp/ide/netbeans64.exe.manifest
+++ b/src/main/cpp/ide/netbeans64.exe.manifest
@@ -50,6 +50,12 @@
       </requestedPrivileges>
      </security>
 </trustInfo>
+<!-- See https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page -->
+<application>
+  <windowsSettings>
+    <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+  </windowsSettings>
+</application>
 <!-- NETBEANS-1227: Indicate the same HiDPI capabilities as javaw.exe from JDK 11. -->
 <asmv3:application xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
   <asmv3:windowsSettings xmlns:dpi1="http://schemas.microsoft.com/SMI/2005/WindowsSettings" xmlns:dpi2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">