Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a78c795

Browse files
committed
Issue 25483: Add an opcode to make f-string formatting more robust.
1 parent 2753a09 commit a78c795

8 files changed

Lines changed: 207 additions & 170 deletions

File tree

Include/ceval.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,14 @@ PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
206206
PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
207207
#endif
208208

209+
/* Masks and values used by FORMAT_VALUE opcode. */
210+
#define FVC_MASK 0x3
211+
#define FVC_NONE 0x0
212+
#define FVC_STR 0x1
213+
#define FVC_REPR 0x2
214+
#define FVC_ASCII 0x3
215+
#define FVS_MASK 0x4
216+
#define FVS_HAVE_SPEC 0x4
209217

210218
#ifdef __cplusplus
211219
}

Include/opcode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ extern "C" {
122122
#define BUILD_TUPLE_UNPACK 152
123123
#define BUILD_SET_UNPACK 153
124124
#define SETUP_ASYNC_WITH 154
125+
#define FORMAT_VALUE 155
125126

126127
/* EXCEPT_HANDLER is a special, implicit block type which is created when
127128
entering an except handler. It is not an opcode but we define it here

Lib/importlib/_bootstrap_external.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,12 +223,13 @@ def _write_atomic(path, data, mode=0o666):
223223
# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations)
224224
# Python 3.5b2 3340 (fix dictionary display evaluation order #11205)
225225
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
226+
# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483)
226227
#
227228
# MAGIC must change whenever the bytecode emitted by the compiler may no
228229
# longer be understood by older implementations of the eval loop (usually
229230
# due to the addition of new opcodes).
230231

231-
MAGIC_NUMBER = (3350).to_bytes(2, 'little') + b'\r\n'
232+
MAGIC_NUMBER = (3360).to_bytes(2, 'little') + b'\r\n'
232233
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
233234

234235
_PYCACHE = '__pycache__'

Lib/opcode.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,6 @@ def jabs_op(name, op):
214214
def_op('BUILD_TUPLE_UNPACK', 152)
215215
def_op('BUILD_SET_UNPACK', 153)
216216

217+
def_op('FORMAT_VALUE', 155)
218+
217219
del def_op, name_op, jrel_op, jabs_op

Python/ceval.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,6 +3363,63 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
33633363
DISPATCH();
33643364
}
33653365

3366+
TARGET(FORMAT_VALUE) {
3367+
/* Handles f-string value formatting. */
3368+
PyObject *result;
3369+
PyObject *fmt_spec;
3370+
PyObject *value;
3371+
PyObject *(*conv_fn)(PyObject *);
3372+
int which_conversion = oparg & FVC_MASK;
3373+
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
3374+
3375+
fmt_spec = have_fmt_spec ? POP() : NULL;
3376+
value = TOP();
3377+
3378+
/* See if any conversion is specified. */
3379+
switch (which_conversion) {
3380+
case FVC_STR: conv_fn = PyObject_Str; break;
3381+
case FVC_REPR: conv_fn = PyObject_Repr; break;
3382+
case FVC_ASCII: conv_fn = PyObject_ASCII; break;
3383+
3384+
/* Must be 0 (meaning no conversion), since only four
3385+
values are allowed by (oparg & FVC_MASK). */
3386+
default: conv_fn = NULL; break;
3387+
}
3388+
3389+
/* If there's a conversion function, call it and replace
3390+
value with that result. Otherwise, just use value,
3391+
without conversion. */
3392+
if (conv_fn) {
3393+
result = conv_fn(value);
3394+
Py_DECREF(value);
3395+
if (!result) {
3396+
Py_XDECREF(fmt_spec);
3397+
goto error;
3398+
}
3399+
value = result;
3400+
}
3401+
3402+
/* If value is a unicode object, and there's no fmt_spec,
3403+
then we know the result of format(value) is value
3404+
itself. In that case, skip calling format(). I plan to
3405+
move this optimization in to PyObject_Format()
3406+
itself. */
3407+
if (PyUnicode_CheckExact(value) && fmt_spec == NULL) {
3408+
/* Do nothing, just transfer ownership to result. */
3409+
result = value;
3410+
} else {
3411+
/* Actually call format(). */
3412+
result = PyObject_Format(value, fmt_spec);
3413+
Py_DECREF(value);
3414+
Py_XDECREF(fmt_spec);
3415+
if (!result)
3416+
goto error;
3417+
}
3418+
3419+
SET_TOP(result);
3420+
DISPATCH();
3421+
}
3422+
33663423
TARGET(EXTENDED_ARG) {
33673424
opcode = NEXTOP();
33683425
oparg = oparg<<16 | NEXTARG();

Python/compile.c

Lines changed: 33 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,10 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg)
10671067
return 1;
10681068
case GET_YIELD_FROM_ITER:
10691069
return 0;
1070+
case FORMAT_VALUE:
1071+
/* If there's a fmt_spec on the stack, we go from 2->1,
1072+
else 1->1. */
1073+
return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
10701074
default:
10711075
return PY_INVALID_STACK_EFFECT;
10721076
}
@@ -3241,83 +3245,47 @@ compiler_joined_str(struct compiler *c, expr_ty e)
32413245
return 1;
32423246
}
32433247

3244-
/* Note that this code uses the builtin functions format(), str(),
3245-
repr(), and ascii(). You can break this code, or make it do odd
3246-
things, by redefining those functions. */
3248+
/* Used to implement f-strings. Format a single value. */
32473249
static int
32483250
compiler_formatted_value(struct compiler *c, expr_ty e)
32493251
{
3250-
PyObject *conversion_name = NULL;
3251-
3252-
static PyObject *format_string;
3253-
static PyObject *str_string;
3254-
static PyObject *repr_string;
3255-
static PyObject *ascii_string;
3256-
3257-
if (!format_string) {
3258-
format_string = PyUnicode_InternFromString("format");
3259-
if (!format_string)
3260-
return 0;
3261-
}
3262-
3263-
if (!str_string) {
3264-
str_string = PyUnicode_InternFromString("str");
3265-
if (!str_string)
3266-
return 0;
3267-
}
3268-
3269-
if (!repr_string) {
3270-
repr_string = PyUnicode_InternFromString("repr");
3271-
if (!repr_string)
3272-
return 0;
3273-
}
3274-
if (!ascii_string) {
3275-
ascii_string = PyUnicode_InternFromString("ascii");
3276-
if (!ascii_string)
3277-
return 0;
3278-
}
3252+
/* Our oparg encodes 2 pieces of information: the conversion
3253+
character, and whether or not a format_spec was provided.
3254+
3255+
Convert the conversion char to 2 bits:
3256+
None: 000 0x0 FVC_NONE
3257+
!s : 001 0x1 FVC_STR
3258+
!r : 010 0x2 FVC_REPR
3259+
!a : 011 0x3 FVC_ASCII
3260+
3261+
next bit is whether or not we have a format spec:
3262+
yes : 100 0x4
3263+
no : 000 0x0
3264+
*/
32793265

3280-
ADDOP_NAME(c, LOAD_GLOBAL, format_string, names);
3266+
int oparg;
32813267

3282-
/* If needed, convert via str, repr, or ascii. */
3283-
if (e->v.FormattedValue.conversion != -1) {
3284-
switch (e->v.FormattedValue.conversion) {
3285-
case 's':
3286-
conversion_name = str_string;
3287-
break;
3288-
case 'r':
3289-
conversion_name = repr_string;
3290-
break;
3291-
case 'a':
3292-
conversion_name = ascii_string;
3293-
break;
3294-
default:
3295-
PyErr_SetString(PyExc_SystemError,
3296-
"Unrecognized conversion character");
3297-
return 0;
3298-
}
3299-
ADDOP_NAME(c, LOAD_GLOBAL, conversion_name, names);
3300-
}
3301-
3302-
/* Evaluate the value. */
3268+
/* Evaluate the expression to be formatted. */
33033269
VISIT(c, expr, e->v.FormattedValue.value);
33043270

3305-
/* If needed, convert via str, repr, or ascii. */
3306-
if (conversion_name) {
3307-
/* Call the function we previously pushed. */
3308-
ADDOP_I(c, CALL_FUNCTION, 1);
3271+
switch (e->v.FormattedValue.conversion) {
3272+
case 's': oparg = FVC_STR; break;
3273+
case 'r': oparg = FVC_REPR; break;
3274+
case 'a': oparg = FVC_ASCII; break;
3275+
case -1: oparg = FVC_NONE; break;
3276+
default:
3277+
PyErr_SetString(PyExc_SystemError,
3278+
"Unrecognized conversion character");
3279+
return 0;
33093280
}
3310-
3311-
/* If we have a format spec, use format(value, format_spec). Otherwise,
3312-
use the single argument form. */
33133281
if (e->v.FormattedValue.format_spec) {
3282+
/* Evaluate the format spec, and update our opcode arg. */
33143283
VISIT(c, expr, e->v.FormattedValue.format_spec);
3315-
ADDOP_I(c, CALL_FUNCTION, 2);
3316-
} else {
3317-
/* No format spec specified, call format(value). */
3318-
ADDOP_I(c, CALL_FUNCTION, 1);
3284+
oparg |= FVS_HAVE_SPEC;
33193285
}
33203286

3287+
/* And push our opcode and oparg */
3288+
ADDOP_I(c, FORMAT_VALUE, oparg);
33213289
return 1;
33223290
}
33233291

0 commit comments

Comments
 (0)