Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 12027ad

Browse files
authored
Merge pull request #1467 from pkese/master
Use exclusively PyUnicode_DecodeUTF16 for .NET->Python string conversion
2 parents 7d8f754 + 49ccc1e commit 12027ad

10 files changed

+42
-49
lines changed

AUTHORS.md

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
- Meinrad Recheis ([@henon](https://github.com/henon))
5656
- Mohamed Koubaa ([@koubaa](https://github.com/koubaa))
5757
- Patrick Stewart ([@patstew](https://github.com/patstew))
58+
- Peter Kese ([@pkese](https://github.com/pkese))
5859
- Raphael Nestler ([@rnestler](https://github.com/rnestler))
5960
- Rickard Holmberg ([@rickardraysearch](https://github.com/rickardraysearch))
6061
- Sam Winstanley ([@swinstanley](https://github.com/swinstanley))

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ One must now either use enum members (e.g. `MyEnum.Option`), or use enum constru
7171
- Exception stacktraces on `PythonException.StackTrace` are now properly formatted
7272
- Providing an invalid type parameter to a generic type or method produces a helpful Python error
7373
- Empty parameter names (as can be generated from F#) do not cause crashes
74+
- Unicode strings with surrogates were truncated when converting from Python
7475

7576
### Removed
7677

src/embed_tests/TestCustomMarshal.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public static void GetManagedStringTwice()
2323
{
2424
const string expected = "FooBar";
2525

26-
IntPtr op = Runtime.Runtime.PyUnicode_FromString(expected);
26+
IntPtr op = Runtime.Runtime.PyString_FromString(expected);
2727
string s1 = Runtime.Runtime.GetManagedString(op);
2828
string s2 = Runtime.Runtime.GetManagedString(op);
2929

src/embed_tests/TestPyString.cs

+19
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,24 @@ public void TestUnicode()
9494
PyObject actual = new PyString(expected);
9595
Assert.AreEqual(expected, actual.ToString());
9696
}
97+
98+
[Test]
99+
public void TestUnicodeSurrogateToString()
100+
{
101+
var expected = "foo\ud83d\udc3c";
102+
var actual = PythonEngine.Eval("'foo\ud83d\udc3c'");
103+
Assert.AreEqual(4, actual.Length());
104+
Assert.AreEqual(expected, actual.ToString());
105+
}
106+
107+
[Test]
108+
public void TestUnicodeSurrogate()
109+
{
110+
const string expected = "foo\ud83d\udc3c"; // "foo🐼"
111+
PyObject actual = new PyString(expected);
112+
// python treats "foo🐼" as 4 characters, dotnet as 5
113+
Assert.AreEqual(4, actual.Length());
114+
Assert.AreEqual(expected, actual.ToString());
115+
}
97116
}
98117
}

src/embed_tests/TestRuntime.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public static void Py_IsInitializedValue()
3636
public static void RefCountTest()
3737
{
3838
Runtime.Runtime.Py_Initialize();
39-
IntPtr op = Runtime.Runtime.PyUnicode_FromString("FooBar");
39+
IntPtr op = Runtime.Runtime.PyString_FromString("FooBar");
4040

4141
// New object RefCount should be one
4242
Assert.AreEqual(1, Runtime.Runtime.Refcount(op));

src/runtime/converter.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ internal static IntPtr ToPython(object value, Type type)
221221
return CLRObject.GetInstHandle(value, type);
222222

223223
case TypeCode.String:
224-
return Runtime.PyUnicode_FromString((string)value);
224+
return Runtime.PyString_FromString((string)value);
225225

226226
case TypeCode.Int32:
227227
return Runtime.PyInt_FromInt32((int)value);

src/runtime/exceptions.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ internal static Exception ToException(BorrowedReference ob)
5050
{
5151
message = String.Format("{0}()", name);
5252
}
53-
return Runtime.PyUnicode_FromString(message);
53+
return Runtime.PyString_FromString(message);
5454
}
5555

5656
/// <summary>
@@ -75,7 +75,7 @@ internal static Exception ToException(BorrowedReference ob)
7575
{
7676
message = message.Substring(fullTypeName.Length);
7777
}
78-
return Runtime.PyUnicode_FromString(message);
78+
return Runtime.PyString_FromString(message);
7979
}
8080
}
8181

@@ -153,7 +153,7 @@ internal static void SetArgsAndCause(BorrowedReference ob, Exception e)
153153
if (!string.IsNullOrEmpty(e.Message))
154154
{
155155
args = Runtime.PyTuple_New(1);
156-
IntPtr msg = Runtime.PyUnicode_FromString(e.Message);
156+
IntPtr msg = Runtime.PyString_FromString(e.Message);
157157
Runtime.PyTuple_SetItem(args, 0, msg);
158158
}
159159
else

src/runtime/pystring.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public PyString(PyObject o) : base(FromObject(o))
5151

5252
private static IntPtr FromString(string s)
5353
{
54-
IntPtr val = Runtime.PyUnicode_FromUnicode(s, s.Length);
54+
IntPtr val = Runtime.PyString_FromString(s);
5555
PythonException.ThrowIfIsNull(val);
5656
return val;
5757
}

src/runtime/runtime.cs

+13-41
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ private static void InitPyMembers()
230230
() => PyStringType = IntPtr.Zero);
231231
XDecref(op);
232232

233-
op = PyUnicode_FromString("unicode");
233+
op = PyString_FromString("unicode");
234234
SetPyMemberTypeOf(ref PyUnicodeType, op,
235235
() => PyUnicodeType = IntPtr.Zero);
236236
XDecref(op);
@@ -1527,7 +1527,12 @@ internal static bool PyString_Check(IntPtr ob)
15271527
internal static IntPtr PyString_FromString(string value)
15281528
{
15291529
fixed(char* ptr = value)
1530-
return PyUnicode_FromKindAndData(2, (IntPtr)ptr, value.Length);
1530+
return Delegates.PyUnicode_DecodeUTF16(
1531+
(IntPtr)ptr,
1532+
value.Length * sizeof(Char),
1533+
IntPtr.Zero,
1534+
IntPtr.Zero
1535+
).DangerousMoveToPointerOrNull();
15311536
}
15321537

15331538

@@ -1553,16 +1558,6 @@ internal static long PyBytes_Size(IntPtr op)
15531558

15541559
private static IntPtr _PyBytes_Size(IntPtr op) => Delegates._PyBytes_Size(op);
15551560

1556-
1557-
internal static IntPtr PyUnicode_FromStringAndSize(IntPtr value, long size)
1558-
{
1559-
return PyUnicode_FromStringAndSize(value, new IntPtr(size));
1560-
}
1561-
1562-
1563-
private static IntPtr PyUnicode_FromStringAndSize(IntPtr value, IntPtr size) => Delegates.PyUnicode_FromStringAndSize(value, size);
1564-
1565-
15661561
internal static IntPtr PyUnicode_AsUTF8(IntPtr unicode) => Delegates.PyUnicode_AsUTF8(unicode);
15671562

15681563
internal static bool PyUnicode_Check(IntPtr ob)
@@ -1576,22 +1571,6 @@ internal static bool PyUnicode_Check(IntPtr ob)
15761571

15771572
internal static IntPtr PyUnicode_FromEncodedObject(IntPtr ob, IntPtr enc, IntPtr err) => Delegates.PyUnicode_FromEncodedObject(ob, enc, err);
15781573

1579-
internal static IntPtr PyUnicode_FromKindAndData(int kind, IntPtr s, long size)
1580-
{
1581-
return PyUnicode_FromKindAndData(kind, s, new IntPtr(size));
1582-
}
1583-
1584-
1585-
private static IntPtr PyUnicode_FromKindAndData(int kind, IntPtr s, IntPtr size)
1586-
=> Delegates.PyUnicode_FromKindAndData(kind, s, size);
1587-
1588-
internal static IntPtr PyUnicode_FromUnicode(string s, long size)
1589-
{
1590-
fixed(char* ptr = s)
1591-
return PyUnicode_FromKindAndData(2, (IntPtr)ptr, size);
1592-
}
1593-
1594-
15951574
internal static int PyUnicode_GetMax() => Delegates.PyUnicode_GetMax();
15961575

15971576
internal static long PyUnicode_GetSize(IntPtr ob)
@@ -1610,12 +1589,6 @@ internal static long PyUnicode_GetSize(IntPtr ob)
16101589

16111590
internal static IntPtr PyUnicode_FromOrdinal(int c) => Delegates.PyUnicode_FromOrdinal(c);
16121591

1613-
internal static IntPtr PyUnicode_FromString(string s)
1614-
{
1615-
return PyUnicode_FromUnicode(s, s.Length);
1616-
}
1617-
1618-
16191592
internal static IntPtr PyUnicode_InternFromString(string s)
16201593
{
16211594
using var ptr = new StrPtr(s, Encoding.UTF8);
@@ -1646,11 +1619,12 @@ internal static string GetManagedString(IntPtr op)
16461619
if (type == PyUnicodeType)
16471620
{
16481621
using var p = PyUnicode_AsUTF16String(new BorrowedReference(op));
1649-
int length = (int)PyUnicode_GetSize(op);
1650-
char* codePoints = (char*)PyBytes_AsString(p.DangerousGetAddress());
1622+
var bytesPtr = p.DangerousGetAddress();
1623+
int bytesLength = (int)Runtime.PyBytes_Size(bytesPtr);
1624+
char* codePoints = (char*)PyBytes_AsString(bytesPtr);
16511625
return new string(codePoints,
16521626
startIndex: 1, // skip BOM
1653-
length: length);
1627+
length: bytesLength/2-1); // utf16 - BOM
16541628
}
16551629

16561630
return null;
@@ -2442,11 +2416,10 @@ static Delegates()
24422416
PyBytes_AsString = (delegate* unmanaged[Cdecl]<BorrowedReference, IntPtr>)GetFunctionByName(nameof(PyBytes_AsString), GetUnmanagedDll(_PythonDll));
24432417
PyBytes_FromString = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr>)GetFunctionByName(nameof(PyBytes_FromString), GetUnmanagedDll(_PythonDll));
24442418
_PyBytes_Size = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr>)GetFunctionByName("PyBytes_Size", GetUnmanagedDll(_PythonDll));
2445-
PyUnicode_FromStringAndSize = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr, IntPtr>)GetFunctionByName(nameof(PyUnicode_FromStringAndSize), GetUnmanagedDll(_PythonDll));
24462419
PyUnicode_AsUTF8 = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr>)GetFunctionByName(nameof(PyUnicode_AsUTF8), GetUnmanagedDll(_PythonDll));
24472420
PyUnicode_FromObject = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr>)GetFunctionByName(nameof(PyUnicode_FromObject), GetUnmanagedDll(_PythonDll));
2421+
PyUnicode_DecodeUTF16 = (delegate* unmanaged[Cdecl]<IntPtr, nint, IntPtr, IntPtr, NewReference>)GetFunctionByName(nameof(PyUnicode_DecodeUTF16), GetUnmanagedDll(_PythonDll));
24482422
PyUnicode_FromEncodedObject = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr, IntPtr, IntPtr>)GetFunctionByName(nameof(PyUnicode_FromEncodedObject), GetUnmanagedDll(_PythonDll));
2449-
PyUnicode_FromKindAndData = (delegate* unmanaged[Cdecl]<int, IntPtr, IntPtr, IntPtr>)GetFunctionByName(nameof(PyUnicode_FromKindAndData), GetUnmanagedDll(_PythonDll));
24502423
PyUnicode_GetMax = (delegate* unmanaged[Cdecl]<int>)GetFunctionByName(nameof(PyUnicode_GetMax), GetUnmanagedDll(_PythonDll));
24512424
_PyUnicode_GetSize = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr>)GetFunctionByName("PyUnicode_GetSize", GetUnmanagedDll(_PythonDll));
24522425
PyUnicode_AsUnicode = (delegate* unmanaged[Cdecl]<IntPtr, IntPtr>)GetFunctionByName(nameof(PyUnicode_AsUnicode), GetUnmanagedDll(_PythonDll));
@@ -2738,11 +2711,10 @@ static Delegates()
27382711
internal static delegate* unmanaged[Cdecl]<BorrowedReference, IntPtr> PyBytes_AsString { get; }
27392712
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr> PyBytes_FromString { get; }
27402713
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr> _PyBytes_Size { get; }
2741-
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr, IntPtr> PyUnicode_FromStringAndSize { get; }
27422714
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr> PyUnicode_AsUTF8 { get; }
27432715
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr> PyUnicode_FromObject { get; }
27442716
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr, IntPtr, IntPtr> PyUnicode_FromEncodedObject { get; }
2745-
internal static delegate* unmanaged[Cdecl]<int, IntPtr, IntPtr, IntPtr> PyUnicode_FromKindAndData { get; }
2717+
internal static delegate* unmanaged[Cdecl]<IntPtr, nint, IntPtr, IntPtr, NewReference> PyUnicode_DecodeUTF16 { get; }
27462718
internal static delegate* unmanaged[Cdecl]<int> PyUnicode_GetMax { get; }
27472719
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr> _PyUnicode_GetSize { get; }
27482720
internal static delegate* unmanaged[Cdecl]<IntPtr, IntPtr> PyUnicode_AsUnicode { get; }

src/runtime/typemanager.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ internal static IntPtr AllocateTypeObject(string name, IntPtr metatype)
580580
// Cheat a little: we'll set tp_name to the internal char * of
581581
// the Python version of the type name - otherwise we'd have to
582582
// allocate the tp_name and would have no way to free it.
583-
IntPtr temp = Runtime.PyUnicode_FromString(name);
583+
IntPtr temp = Runtime.PyString_FromString(name);
584584
IntPtr raw = Runtime.PyUnicode_AsUTF8(temp);
585585
Marshal.WriteIntPtr(type, TypeOffset.tp_name, raw);
586586
Marshal.WriteIntPtr(type, TypeOffset.name, temp);

0 commit comments

Comments
 (0)