diff --git a/src/runtime/CustomMarshaler.cs b/src/runtime/CustomMarshaler.cs
new file mode 100644
index 000000000..475fa05eb
--- /dev/null
+++ b/src/runtime/CustomMarshaler.cs
@@ -0,0 +1,171 @@
+using System;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Python.Runtime
+{
+ ///
+ /// Abstract class defining boiler plate methods that
+ /// Custom Marshalers will use.
+ ///
+ public abstract class MarshalerBase : ICustomMarshaler
+ {
+ public object MarshalNativeToManaged(IntPtr pNativeData)
+ {
+ throw new NotImplementedException();
+ }
+
+ public abstract IntPtr MarshalManagedToNative(object managedObj);
+
+ public void CleanUpNativeData(IntPtr pNativeData)
+ {
+ Marshal.FreeHGlobal(pNativeData);
+ }
+
+ public void CleanUpManagedData(object managedObj)
+ {
+ // Let GC deal with it
+ }
+
+ public int GetNativeDataSize()
+ {
+ return IntPtr.Size;
+ }
+ }
+
+
+ ///
+ /// Custom Marshaler to deal with Managed String to Native
+ /// conversion differences on UCS2/UCS4.
+ ///
+ public class StrMarshaler : MarshalerBase
+ {
+ private static readonly MarshalerBase Instance = new StrMarshaler();
+ private static readonly Encoding PyEncoding = Runtime.PyEncoding;
+
+ public override IntPtr MarshalManagedToNative(object managedObj)
+ {
+ var s = managedObj as string;
+
+ if (s == null)
+ {
+ return IntPtr.Zero;
+ }
+
+ byte[] bStr = PyEncoding.GetBytes(s + "\0");
+ IntPtr mem = Marshal.AllocHGlobal(bStr.Length);
+ try
+ {
+ Marshal.Copy(bStr, 0, mem, bStr.Length);
+ }
+ catch (Exception)
+ {
+ Marshal.FreeHGlobal(mem);
+ throw;
+ }
+
+ return mem;
+ }
+
+ public static ICustomMarshaler GetInstance(string cookie)
+ {
+ return Instance;
+ }
+ }
+
+
+ ///
+ /// Custom Marshaler to deal with Managed String Arrays to Native
+ /// conversion differences on UCS2/UCS4.
+ ///
+ public class StrArrayMarshaler : MarshalerBase
+ {
+ private static readonly MarshalerBase Instance = new StrArrayMarshaler();
+ private static readonly Encoding PyEncoding = Runtime.PyEncoding;
+
+ public override IntPtr MarshalManagedToNative(object managedObj)
+ {
+ var argv = managedObj as string[];
+
+ if (argv == null)
+ {
+ return IntPtr.Zero;
+ }
+
+ int totalStrLength = argv.Sum(arg => arg.Length + 1);
+ int memSize = argv.Length * IntPtr.Size + totalStrLength * Runtime.UCS;
+
+ IntPtr mem = Marshal.AllocHGlobal(memSize);
+ try
+ {
+ // Preparing array of pointers to strings
+ IntPtr curStrPtr = mem + argv.Length * IntPtr.Size;
+ for (var i = 0; i < argv.Length; i++)
+ {
+ byte[] bStr = PyEncoding.GetBytes(argv[i] + "\0");
+ Marshal.Copy(bStr, 0, curStrPtr, bStr.Length);
+ Marshal.WriteIntPtr(mem + i * IntPtr.Size, curStrPtr);
+ curStrPtr += bStr.Length;
+ }
+ }
+ catch (Exception)
+ {
+ Marshal.FreeHGlobal(mem);
+ throw;
+ }
+
+ return mem;
+ }
+
+ public static ICustomMarshaler GetInstance(string cookie)
+ {
+ return Instance;
+ }
+ }
+
+
+ ///
+ /// Custom Marshaler to deal with Managed String to Native
+ /// conversion on UTF-8. Use on functions that expect UTF-8 encoded
+ /// strings like `PyUnicode_FromStringAndSize`
+ ///
+ ///
+ /// If instead we used `MarshalAs(UnmanagedType.LPWStr)` the output to
+ /// `foo` would be `f\x00o\x00o\x00`.
+ ///
+ public class Utf8Marshaler : MarshalerBase
+ {
+ private static readonly MarshalerBase Instance = new Utf8Marshaler();
+ private static readonly Encoding PyEncoding = Encoding.UTF8;
+
+ public override IntPtr MarshalManagedToNative(object managedObj)
+ {
+ var s = managedObj as string;
+
+ if (s == null)
+ {
+ return IntPtr.Zero;
+ }
+
+ byte[] bStr = PyEncoding.GetBytes(s + "\0");
+ IntPtr mem = Marshal.AllocHGlobal(bStr.Length);
+ try
+ {
+ Marshal.Copy(bStr, 0, mem, bStr.Length);
+ }
+ catch (Exception)
+ {
+ Marshal.FreeHGlobal(mem);
+ throw;
+ }
+
+ return mem;
+ }
+
+ public static ICustomMarshaler GetInstance(string cookie)
+ {
+ return Instance;
+ }
+ }
+}
diff --git a/src/runtime/Python.Runtime.csproj b/src/runtime/Python.Runtime.csproj
index 3dbad5f75..8580b7f61 100644
--- a/src/runtime/Python.Runtime.csproj
+++ b/src/runtime/Python.Runtime.csproj
@@ -89,6 +89,7 @@
+
diff --git a/src/runtime/converter.cs b/src/runtime/converter.cs
index aeaf2d871..8b58e5e9a 100644
--- a/src/runtime/converter.cs
+++ b/src/runtime/converter.cs
@@ -599,21 +599,10 @@ private static bool ToPrimitive(IntPtr value, Type obType, out object result, bo
{
if (Runtime.PyUnicode_GetSize(value) == 1)
{
- op = Runtime.PyUnicode_AS_UNICODE(value);
- if (Runtime.UCS == 2) // Don't trust linter, statement not always true.
- {
- // 2011-01-02: Marshal as character array because the cast
- // result = (char)Marshal.ReadInt16(op); throws an OverflowException
- // on negative numbers with Check Overflow option set on the project
- Char[] buff = new Char[1];
- Marshal.Copy(op, buff, 0, 1);
- result = buff[0];
- }
- else // UCS4
- {
- // XXX this is probably NOT correct?
- result = (char)Marshal.ReadInt32(op);
- }
+ op = Runtime.PyUnicode_AsUnicode(value);
+ Char[] buff = new Char[1];
+ Marshal.Copy(op, buff, 0, 1);
+ result = buff[0];
return true;
}
goto type_error;
diff --git a/src/runtime/debughelper.cs b/src/runtime/debughelper.cs
index 777a61e35..2a91a74b4 100644
--- a/src/runtime/debughelper.cs
+++ b/src/runtime/debughelper.cs
@@ -115,5 +115,27 @@ internal static void debug(string msg)
Console.WriteLine("thread {0} : {1}", tid, caller);
Console.WriteLine(" {0}", msg);
}
+
+ ///
+ /// Helper function to inspect/compare managed to native conversions.
+ /// Especially useful when debugging CustomMarshaler.
+ ///
+ ///
+ [Conditional("DEBUG")]
+ public static void PrintHexBytes(byte[] bytes)
+ {
+ if ((bytes == null) || (bytes.Length == 0))
+ {
+ Console.WriteLine("");
+ }
+ else
+ {
+ foreach (byte t in bytes)
+ {
+ Console.Write("{0:X2} ", t);
+ }
+ Console.WriteLine();
+ }
+ }
}
}
diff --git a/src/runtime/runtime.cs b/src/runtime/runtime.cs
index 6398d3345..337b82fbf 100644
--- a/src/runtime/runtime.cs
+++ b/src/runtime/runtime.cs
@@ -170,6 +170,11 @@ public class Runtime
internal static bool IsPython2;
internal static bool IsPython3;
+ ///
+ /// Encoding to use to convert Unicode to/from Managed to Native
+ ///
+ internal static readonly Encoding PyEncoding = UCS == 2 ? Encoding.Unicode : Encoding.UTF32;
+
///
/// Initialize the runtime...
///
@@ -667,41 +672,8 @@ internal unsafe static extern IntPtr
public unsafe static extern int
Py_Main(
int argc,
- IntPtr lplpargv
+ [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrArrayMarshaler))] string[] argv
);
-
- public static int Py_Main(int argc, string[] argv)
- {
- // Totally ignoring argc.
- argc = argv.Length;
-
- var allStringsLength = 0;
- foreach (string x in argv)
- {
- allStringsLength += x.Length + 1;
- }
- int requiredSize = IntPtr.Size * argc + allStringsLength * UCS;
- IntPtr mem = Marshal.AllocHGlobal(requiredSize);
- try
- {
- // Preparing array of pointers to UTF32 strings.
- IntPtr curStrPtr = mem + argc * IntPtr.Size;
- for (var i = 0; i < argv.Length; i++)
- {
- // Unicode or UTF8 work
- Encoding enc = UCS == 2 ? Encoding.Unicode : Encoding.UTF32;
- byte[] zstr = enc.GetBytes(argv[i] + "\0");
- Marshal.Copy(zstr, 0, curStrPtr, zstr.Length);
- Marshal.WriteIntPtr(mem + IntPtr.Size * i, curStrPtr);
- curStrPtr += zstr.Length;
- }
- return Py_Main(argc, mem);
- }
- finally
- {
- Marshal.FreeHGlobal(mem);
- }
- }
#elif PYTHON2
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
ExactSpelling = true, CharSet = CharSet.Ansi)]
@@ -1541,23 +1513,12 @@ internal static IntPtr PyBytes_AS_STRING(IntPtr ob)
return ob + BytesOffset.ob_sval;
}
- internal static IntPtr PyString_FromStringAndSize(string value, int length)
- {
- // copy the string into an unmanaged UTF-8 buffer
- int len = Encoding.UTF8.GetByteCount(value);
- byte[] buffer = new byte[len + 1];
- Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, 0);
- IntPtr nativeUtf8 = Marshal.AllocHGlobal(buffer.Length);
- try
- {
- Marshal.Copy(buffer, 0, nativeUtf8, buffer.Length);
- return PyUnicode_FromStringAndSize(nativeUtf8, length);
- }
- finally
- {
- Marshal.FreeHGlobal(nativeUtf8);
- }
- }
+ [DllImport(Runtime.dll, EntryPoint = "PyUnicode_FromStringAndSize")]
+ internal static extern IntPtr
+ PyString_FromStringAndSize(
+ [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(Utf8Marshaler))] string value,
+ int size
+ );
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
ExactSpelling = true, CharSet = CharSet.Unicode)]
@@ -1616,14 +1577,8 @@ internal unsafe static extern int
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
ExactSpelling = true, CharSet = CharSet.Unicode)]
- internal unsafe static extern char*
- PyUnicode_AsUnicode(IntPtr ob);
-
- [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
- EntryPoint = "PyUnicode_AsUnicode",
- ExactSpelling = true, CharSet = CharSet.Unicode)]
internal unsafe static extern IntPtr
- PyUnicode_AS_UNICODE(IntPtr op);
+ PyUnicode_AsUnicode(IntPtr ob);
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
ExactSpelling = true, CharSet = CharSet.Unicode)]
@@ -1657,14 +1612,8 @@ internal unsafe static extern int
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
EntryPoint = "PyUnicodeUCS2_AsUnicode",
ExactSpelling = true)]
- internal unsafe static extern char*
- PyUnicode_AsUnicode(IntPtr ob);
-
- [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
- EntryPoint = "PyUnicodeUCS2_AsUnicode",
- ExactSpelling = true, CharSet = CharSet.Unicode)]
internal unsafe static extern IntPtr
- PyUnicode_AS_UNICODE(IntPtr op);
+ PyUnicode_AsUnicode(IntPtr ob);
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
EntryPoint = "PyUnicodeUCS2_FromOrdinal",
@@ -1688,34 +1637,10 @@ internal unsafe static extern IntPtr
internal unsafe static extern IntPtr
PyUnicode_FromKindAndString(
int kind,
- IntPtr s,
+ [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrMarshaler))] string s,
int size
);
- internal static unsafe IntPtr PyUnicode_FromKindAndString(
- int kind,
- string s,
- int size)
- {
- var bufLength = Math.Max(s.Length, size) * 4;
-
- IntPtr mem = Marshal.AllocHGlobal(bufLength);
- try
- {
- fixed (char* ps = s)
- {
- Encoding.UTF32.GetBytes(ps, s.Length, (byte*)mem, bufLength);
- }
-
- var result = PyUnicode_FromKindAndString(kind, mem, size);
- return result;
- }
- finally
- {
- Marshal.FreeHGlobal(mem);
- }
- }
-
internal static IntPtr PyUnicode_FromUnicode(string s, int size)
{
return PyUnicode_FromKindAndString(4, s, size);
@@ -1731,12 +1656,6 @@ internal unsafe static extern int
internal unsafe static extern IntPtr
PyUnicode_AsUnicode(IntPtr ob);
- [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
- EntryPoint = "PyUnicode_AsUnicode",
- ExactSpelling = true, CharSet = CharSet.Unicode)]
- internal unsafe static extern IntPtr
- PyUnicode_AS_UNICODE(IntPtr op);
-
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
ExactSpelling = true, CharSet = CharSet.Unicode)]
internal unsafe static extern IntPtr
@@ -1758,28 +1677,10 @@ internal unsafe static extern IntPtr
EntryPoint = "PyUnicodeUCS4_FromUnicode",
ExactSpelling = true)]
internal unsafe static extern IntPtr
- PyUnicode_FromUnicode(IntPtr s, int size);
-
- internal static unsafe IntPtr PyUnicode_FromUnicode(string s, int size)
- {
- var bufLength = Math.Max(s.Length, size) * 4;
-
- IntPtr mem = Marshal.AllocHGlobal(bufLength);
- try
- {
- fixed (char* ps = s)
- {
- Encoding.UTF32.GetBytes(ps, s.Length, (byte*)mem, bufLength);
- }
-
- var result = PyUnicode_FromUnicode(mem, size);
- return result;
- }
- finally
- {
- Marshal.FreeHGlobal(mem);
- }
- }
+ PyUnicode_FromUnicode(
+ [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrMarshaler))] string s,
+ int size
+ );
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
EntryPoint = "PyUnicodeUCS4_GetSize",
@@ -1793,12 +1694,6 @@ internal unsafe static extern int
internal unsafe static extern IntPtr
PyUnicode_AsUnicode(IntPtr ob);
- [DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
- EntryPoint = "PyUnicodeUCS4_AsUnicode",
- ExactSpelling = true, CharSet = CharSet.Unicode)]
- internal unsafe static extern IntPtr
- PyUnicode_AS_UNICODE(IntPtr op);
-
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
EntryPoint = "PyUnicodeUCS4_FromOrdinal",
ExactSpelling = true, CharSet = CharSet.Unicode)]
@@ -1811,7 +1706,20 @@ internal static IntPtr PyUnicode_FromString(string s)
return PyUnicode_FromUnicode(s, (s.Length));
}
- internal unsafe static string GetManagedString(IntPtr op)
+ ///
+ /// Function to access the internal PyUnicode/PyString object and
+ /// convert it to a managed string with the correct encoding.
+ ///
+ ///
+ /// We can't easily do this through through the CustomMarshaler's on
+ /// the returns because will have access to the IntPtr but not size.
+ ///
+ /// For PyUnicodeType, we can't convert with Marshal.PtrToStringUni
+ /// since it only works for UCS2.
+ ///
+ /// PyStringType or PyUnicodeType object to convert
+ /// Managed String
+ internal static string GetManagedString(IntPtr op)
{
IntPtr type = PyObject_TYPE(op);
@@ -1827,18 +1735,13 @@ internal unsafe static string GetManagedString(IntPtr op)
if (type == Runtime.PyUnicodeType)
{
-#if UCS4
- IntPtr p = Runtime.PyUnicode_AsUnicode(op);
- int length = Runtime.PyUnicode_GetSize(op);
- int size = length * 4;
- byte[] buffer = new byte[size];
+ IntPtr p = PyUnicode_AsUnicode(op);
+ int length = PyUnicode_GetSize(op);
+
+ int size = length * UCS;
+ var buffer = new byte[size];
Marshal.Copy(p, buffer, 0, size);
- return Encoding.UTF32.GetString(buffer, 0, size);
-#elif UCS2
- char* p = Runtime.PyUnicode_AsUnicode(op);
- int size = Runtime.PyUnicode_GetSize(op);
- return new String(p, 0, size);
-#endif
+ return PyEncoding.GetString(buffer, 0, size);
}
return null;
@@ -2120,41 +2023,9 @@ internal unsafe static extern IntPtr
internal unsafe static extern void
PySys_SetArgvEx(
int argc,
- IntPtr lplpargv,
+ [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(StrArrayMarshaler))] string[] argv,
int updatepath
);
-
- internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)
- {
- // Totally ignoring argc.
- argc = argv.Length;
-
- var allStringsLength = 0;
- foreach (string x in argv)
- {
- allStringsLength += x.Length + 1;
- }
- int requiredSize = IntPtr.Size * argc + allStringsLength * UCS;
- IntPtr mem = Marshal.AllocHGlobal(requiredSize);
- try
- {
- // Preparing array of pointers to UTF32 strings.
- IntPtr curStrPtr = mem + argc * IntPtr.Size;
- for (var i = 0; i < argv.Length; i++)
- {
- Encoding enc = UCS == 2 ? Encoding.Unicode : Encoding.UTF32;
- byte[] zstr = enc.GetBytes(argv[i] + "\0");
- Marshal.Copy(zstr, 0, curStrPtr, zstr.Length);
- Marshal.WriteIntPtr(mem + IntPtr.Size * i, curStrPtr);
- curStrPtr += zstr.Length;
- }
- PySys_SetArgvEx(argc, mem, updatepath);
- }
- finally
- {
- Marshal.FreeHGlobal(mem);
- }
- }
#elif PYTHON2
[DllImport(Runtime.dll, CallingConvention = CallingConvention.Cdecl,
ExactSpelling = true, CharSet = CharSet.Ansi)]