diff --git a/src/runtime/CustomMarshaler.cs b/src/runtime/CustomMarshaler.cs new file mode 100644 index 000000000..475fa05eb --- /dev/null +++ b/src/runtime/CustomMarshaler.cs @@ -0,0 +1,171 @@ +using System; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; + +namespace Python.Runtime +{ + /// + /// Abstract class defining boiler plate methods that + /// Custom Marshalers will use. + /// + public abstract class MarshalerBase : ICustomMarshaler + { + public object MarshalNativeToManaged(IntPtr pNativeData) + { + throw new NotImplementedException(); + } + + public abstract IntPtr MarshalManagedToNative(object managedObj); + + public void CleanUpNativeData(IntPtr pNativeData) + { + Marshal.FreeHGlobal(pNativeData); + } + + public void CleanUpManagedData(object managedObj) + { + // Let GC deal with it + } + + public int GetNativeDataSize() + { + return IntPtr.Size; + } + } + + + /// + /// Custom Marshaler to deal with Managed String to Native + /// conversion differences on UCS2/UCS4. + /// + public class StrMarshaler : MarshalerBase + { + private static readonly MarshalerBase Instance = new StrMarshaler(); + private static readonly Encoding PyEncoding = Runtime.PyEncoding; + + public override IntPtr MarshalManagedToNative(object managedObj) + { + var s = managedObj as string; + + if (s == null) + { + return IntPtr.Zero; + } + + byte[] bStr = PyEncoding.GetBytes(s + "\0"); + IntPtr mem = Marshal.AllocHGlobal(bStr.Length); + try + { + Marshal.Copy(bStr, 0, mem, bStr.Length); + } + catch (Exception) + { + Marshal.FreeHGlobal(mem); + throw; + } + + return mem; + } + + public static ICustomMarshaler GetInstance(string cookie) + { + return Instance; + } + } + + + /// + /// Custom Marshaler to deal with Managed String Arrays to Native + /// conversion differences on UCS2/UCS4. + /// + public class StrArrayMarshaler : MarshalerBase + { + private static readonly MarshalerBase Instance = new StrArrayMarshaler(); + private static readonly Encoding PyEncoding = Runtime.PyEncoding; + + public override IntPtr MarshalManagedToNative(object managedObj) + { + var argv = managedObj as string[]; + + if (argv == null) + { + return IntPtr.Zero; + } + + int totalStrLength = argv.Sum(arg => arg.Length + 1); + int memSize = argv.Length * IntPtr.Size + totalStrLength * Runtime.UCS; + + IntPtr mem = Marshal.AllocHGlobal(memSize); + try + { + // Preparing array of pointers to strings + IntPtr curStrPtr = mem + argv.Length * IntPtr.Size; + for (var i = 0; i < argv.Length; i++) + { + byte[] bStr = PyEncoding.GetBytes(argv[i] + "\0"); + Marshal.Copy(bStr, 0, curStrPtr, bStr.Length); + Marshal.WriteIntPtr(mem + i * IntPtr.Size, curStrPtr); + curStrPtr += bStr.Length; + } + } + catch (Exception) + { + Marshal.FreeHGlobal(mem); + throw; + } + + return mem; + } + + public static ICustomMarshaler GetInstance(string cookie) + { + return Instance; + } + } + + + /// + /// Custom Marshaler to deal with Managed String to Native + /// conversion on UTF-8. Use on functions that expect UTF-8 encoded + /// strings like `PyUnicode_FromStringAndSize` + /// + /// + /// If instead we used `MarshalAs(UnmanagedType.LPWStr)` the output to + /// `foo` would be `f\x00o\x00o\x00`. + /// + public class Utf8Marshaler : MarshalerBase + { + private static readonly MarshalerBase Instance = new Utf8Marshaler(); + private static readonly Encoding PyEncoding = Encoding.UTF8; + + public override IntPtr MarshalManagedToNative(object managedObj) + { + var s = managedObj as string; + + if (s == null) + { + return IntPtr.Zero; + } + + byte[] bStr = PyEncoding.GetBytes(s + "\0"); + IntPtr mem = Marshal.AllocHGlobal(bStr.Length); + try + { + Marshal.Copy(bStr, 0, mem, bStr.Length); + } + catch (Exception) + { + Marshal.FreeHGlobal(mem); + throw; + } + + return mem; + } + + public static ICustomMarshaler GetInstance(string cookie) + { + return Instance; + } + } +} diff --git a/src/runtime/Python.Runtime.csproj b/src/runtime/Python.Runtime.csproj index 3dbad5f75..8580b7f61 100644 --- a/src/runtime/Python.Runtime.csproj +++ b/src/runtime/Python.Runtime.csproj @@ -89,6 +89,7 @@ + diff --git a/src/runtime/converter.cs b/src/runtime/converter.cs index aeaf2d871..8b58e5e9a 100644 --- a/src/runtime/converter.cs +++ b/src/runtime/converter.cs @@ -599,21 +599,10 @@ private static bool ToPrimitive(IntPtr value, Type obType, out object result, bo { if (Runtime.PyUnicode_GetSize(value) == 1) { - op = Runtime.PyUnicode_AS_UNICODE(value); - if (Runtime.UCS == 2) // Don't trust linter, statement not always true. - { - // 2011-01-02: Marshal as character array because the cast - // result = (char)Marshal.ReadInt16(op); throws an OverflowException - // on negative numbers with Check Overflow option set on the project - Char[] buff = new Char[1]; - Marshal.Copy(op, buff, 0, 1); - result = buff[0]; - } - else // UCS4 - { - // XXX this is probably NOT correct? - result = (char)Marshal.ReadInt32(op); - } + op = Runtime.PyUnicode_AsUnicode(value); + Char[] buff = new Char[1]; + Marshal.Copy(op, buff, 0, 1); + result = buff[0]; return true; } goto type_error; diff --git a/src/runtime/debughelper.cs b/src/runtime/debughelper.cs index 777a61e35..2a91a74b4 100644 --- a/src/runtime/debughelper.cs +++ b/src/runtime/debughelper.cs @@ -115,5 +115,27 @@ internal static void debug(string msg) Console.WriteLine("thread {0} : {1}", tid, caller); Console.WriteLine(" {0}", msg); } + + /// + /// Helper function to inspect/compare managed to native conversions. + /// Especially useful when debugging CustomMarshaler. + /// + /// + [Conditional("DEBUG")] + public static void PrintHexBytes(byte[] bytes) + { + if ((bytes == null) || (bytes.Length == 0)) + { + Console.WriteLine(""); + } + else + { + foreach (byte t in bytes) + { + Console.Write("{0:X2} ", t); + } + Console.WriteLine(); + } + } } } diff --git a/src/runtime/runtime.cs b/src/runtime/runtime.cs index 6398d3345..337b82fbf 100644 --- a/src/runtime/runtime.cs +++ b/src/runtime/runtime.cs @@ -170,6 +170,11 @@ public class Runtime internal static bool IsPython2; internal static bool IsPython3; + /// + /// Encoding to use to convert Unicode to/from Managed to Native + /// + internal static readonly Encoding PyEncoding = UCS == 2 ? Encoding.Unicode : Encoding.UTF32; + /// /// Initialize the runtime... /// @@ -667,41 +672,8 @@ internal unsafe static extern IntPtr public unsafe static extern int Py_Main( int argc, - IntPtr lplpargv + [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrArrayMarshaler))] string[] argv ); - - public static int Py_Main(int argc, string[] argv) - { - // Totally ignoring argc. - argc = argv.Length; - - var allStringsLength = 0; - foreach (string x in argv) - { - allStringsLength += x.Length + 1; - } - int requiredSize = IntPtr.Size * argc + allStringsLength * UCS; - IntPtr mem = Marshal.AllocHGlobal(requiredSize); - try - { - // Preparing array of pointers to UTF32 strings. - IntPtr curStrPtr = mem + argc * IntPtr.Size; - for (var i = 0; i < argv.Length; i++) - { - // Unicode or UTF8 work - Encoding enc = UCS == 2 ? Encoding.Unicode : Encoding.UTF32; - byte[] zstr = enc.GetBytes(argv[i] + "\0"); - Marshal.Copy(zstr, 0, curStrPtr, zstr.Length); - Marshal.WriteIntPtr(mem + IntPtr.Size * i, curStrPtr); - curStrPtr += zstr.Length; - } - return Py_Main(argc, mem); - } - finally - { - Marshal.FreeHGlobal(mem); - } - } #elif PYTHON2 [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, CharSet = CharSet.Ansi)] @@ -1541,23 +1513,12 @@ internal static IntPtr PyBytes_AS_STRING(IntPtr ob) return ob + BytesOffset.ob_sval; } - internal static IntPtr PyString_FromStringAndSize(string value, int length) - { - // copy the string into an unmanaged UTF-8 buffer - int len = Encoding.UTF8.GetByteCount(value); - byte[] buffer = new byte[len + 1]; - Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0); - IntPtr nativeUtf8 = Marshal.AllocHGlobal(buffer.Length); - try - { - Marshal.Copy(buffer, 0, nativeUtf8, buffer.Length); - return PyUnicode_FromStringAndSize(nativeUtf8, length); - } - finally - { - Marshal.FreeHGlobal(nativeUtf8); - } - } + [DllImport(Runtime.dll, EntryPoint = "PyUnicode_FromStringAndSize")] + internal static extern IntPtr + PyString_FromStringAndSize( + [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Utf8Marshaler))] string value, + int size + ); [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, CharSet = CharSet.Unicode)] @@ -1616,14 +1577,8 @@ internal unsafe static extern int [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, CharSet = CharSet.Unicode)] - internal unsafe static extern char* - PyUnicode_AsUnicode(IntPtr ob); - - [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, - EntryPoint = "PyUnicode_AsUnicode", - ExactSpelling = true, CharSet = CharSet.Unicode)] internal unsafe static extern IntPtr - PyUnicode_AS_UNICODE(IntPtr op); + PyUnicode_AsUnicode(IntPtr ob); [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, CharSet = CharSet.Unicode)] @@ -1657,14 +1612,8 @@ internal unsafe static extern int [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, EntryPoint = "PyUnicodeUCS2_AsUnicode", ExactSpelling = true)] - internal unsafe static extern char* - PyUnicode_AsUnicode(IntPtr ob); - - [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, - EntryPoint = "PyUnicodeUCS2_AsUnicode", - ExactSpelling = true, CharSet = CharSet.Unicode)] internal unsafe static extern IntPtr - PyUnicode_AS_UNICODE(IntPtr op); + PyUnicode_AsUnicode(IntPtr ob); [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, EntryPoint = "PyUnicodeUCS2_FromOrdinal", @@ -1688,34 +1637,10 @@ internal unsafe static extern IntPtr internal unsafe static extern IntPtr PyUnicode_FromKindAndString( int kind, - IntPtr s, + [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrMarshaler))] string s, int size ); - internal static unsafe IntPtr PyUnicode_FromKindAndString( - int kind, - string s, - int size) - { - var bufLength = Math.Max(s.Length, size) * 4; - - IntPtr mem = Marshal.AllocHGlobal(bufLength); - try - { - fixed (char* ps = s) - { - Encoding.UTF32.GetBytes(ps, s.Length, (byte*)mem, bufLength); - } - - var result = PyUnicode_FromKindAndString(kind, mem, size); - return result; - } - finally - { - Marshal.FreeHGlobal(mem); - } - } - internal static IntPtr PyUnicode_FromUnicode(string s, int size) { return PyUnicode_FromKindAndString(4, s, size); @@ -1731,12 +1656,6 @@ internal unsafe static extern int internal unsafe static extern IntPtr PyUnicode_AsUnicode(IntPtr ob); - [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, - EntryPoint = "PyUnicode_AsUnicode", - ExactSpelling = true, CharSet = CharSet.Unicode)] - internal unsafe static extern IntPtr - PyUnicode_AS_UNICODE(IntPtr op); - [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, CharSet = CharSet.Unicode)] internal unsafe static extern IntPtr @@ -1758,28 +1677,10 @@ internal unsafe static extern IntPtr EntryPoint = "PyUnicodeUCS4_FromUnicode", ExactSpelling = true)] internal unsafe static extern IntPtr - PyUnicode_FromUnicode(IntPtr s, int size); - - internal static unsafe IntPtr PyUnicode_FromUnicode(string s, int size) - { - var bufLength = Math.Max(s.Length, size) * 4; - - IntPtr mem = Marshal.AllocHGlobal(bufLength); - try - { - fixed (char* ps = s) - { - Encoding.UTF32.GetBytes(ps, s.Length, (byte*)mem, bufLength); - } - - var result = PyUnicode_FromUnicode(mem, size); - return result; - } - finally - { - Marshal.FreeHGlobal(mem); - } - } + PyUnicode_FromUnicode( + [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrMarshaler))] string s, + int size + ); [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, EntryPoint = "PyUnicodeUCS4_GetSize", @@ -1793,12 +1694,6 @@ internal unsafe static extern int internal unsafe static extern IntPtr PyUnicode_AsUnicode(IntPtr ob); - [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, - EntryPoint = "PyUnicodeUCS4_AsUnicode", - ExactSpelling = true, CharSet = CharSet.Unicode)] - internal unsafe static extern IntPtr - PyUnicode_AS_UNICODE(IntPtr op); - [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, EntryPoint = "PyUnicodeUCS4_FromOrdinal", ExactSpelling = true, CharSet = CharSet.Unicode)] @@ -1811,7 +1706,20 @@ internal static IntPtr PyUnicode_FromString(string s) return PyUnicode_FromUnicode(s, (s.Length)); } - internal unsafe static string GetManagedString(IntPtr op) + /// + /// Function to access the internal PyUnicode/PyString object and + /// convert it to a managed string with the correct encoding. + /// + /// + /// We can't easily do this through through the CustomMarshaler's on + /// the returns because will have access to the IntPtr but not size. + /// + /// For PyUnicodeType, we can't convert with Marshal.PtrToStringUni + /// since it only works for UCS2. + /// + /// PyStringType or PyUnicodeType object to convert + /// Managed String + internal static string GetManagedString(IntPtr op) { IntPtr type = PyObject_TYPE(op); @@ -1827,18 +1735,13 @@ internal unsafe static string GetManagedString(IntPtr op) if (type == Runtime.PyUnicodeType) { -#if UCS4 - IntPtr p = Runtime.PyUnicode_AsUnicode(op); - int length = Runtime.PyUnicode_GetSize(op); - int size = length * 4; - byte[] buffer = new byte[size]; + IntPtr p = PyUnicode_AsUnicode(op); + int length = PyUnicode_GetSize(op); + + int size = length * UCS; + var buffer = new byte[size]; Marshal.Copy(p, buffer, 0, size); - return Encoding.UTF32.GetString(buffer, 0, size); -#elif UCS2 - char* p = Runtime.PyUnicode_AsUnicode(op); - int size = Runtime.PyUnicode_GetSize(op); - return new String(p, 0, size); -#endif + return PyEncoding.GetString(buffer, 0, size); } return null; @@ -2120,41 +2023,9 @@ internal unsafe static extern IntPtr internal unsafe static extern void PySys_SetArgvEx( int argc, - IntPtr lplpargv, + [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrArrayMarshaler))] string[] argv, int updatepath ); - - internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath) - { - // Totally ignoring argc. - argc = argv.Length; - - var allStringsLength = 0; - foreach (string x in argv) - { - allStringsLength += x.Length + 1; - } - int requiredSize = IntPtr.Size * argc + allStringsLength * UCS; - IntPtr mem = Marshal.AllocHGlobal(requiredSize); - try - { - // Preparing array of pointers to UTF32 strings. - IntPtr curStrPtr = mem + argc * IntPtr.Size; - for (var i = 0; i < argv.Length; i++) - { - Encoding enc = UCS == 2 ? Encoding.Unicode : Encoding.UTF32; - byte[] zstr = enc.GetBytes(argv[i] + "\0"); - Marshal.Copy(zstr, 0, curStrPtr, zstr.Length); - Marshal.WriteIntPtr(mem + IntPtr.Size * i, curStrPtr); - curStrPtr += zstr.Length; - } - PySys_SetArgvEx(argc, mem, updatepath); - } - finally - { - Marshal.FreeHGlobal(mem); - } - } #elif PYTHON2 [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, CharSet = CharSet.Ansi)]