SF patch #455966: Allow leading 0 in float/imag literals.

tim-one · tim-one · commit d507dab91f97 · 2001-08-30T20:51:59.000Z
Consequences for Jython still unknown (but raised on Jython-Dev).
diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex
@@ -517,26 +517,26 @@ \subsection{Floating point literals\label{floating}}
   \production{pointfloat}
              {[\token{intpart}] \token{fraction} | \token{intpart} "."}
   \production{exponentfloat}
-             {(\token{nonzerodigit} \token{digit}* | \token{pointfloat})
+             {(\token{intpart} | \token{pointfloat})
               \token{exponent}}
   \production{intpart}
-             {\token{nonzerodigit} \token{digit}* | "0"}
+             {\token{digit}+}
   \production{fraction}
              {"." \token{digit}+}
   \production{exponent}
              {("e" | "E") ["+" | "-"] \token{digit}+}
 \end{productionlist}
 
-Note that the integer part of a floating point number cannot look like
-an octal integer, though the exponent may look like an octal literal
-but will always be interpreted using radix 10.  For example,
-\samp{1e010} is legal, while \samp{07.1} is a syntax error.
+Note that the integer and exponent parts of floating point numbers
+can look like octal integers, but are interpreted using radix 10.  For
+example, \samp{077e010} is legal, and denotes the same number
+as \samp{77e10}.
 The allowed range of floating point literals is
 implementation-dependent.
 Some examples of floating point literals:
 
 \begin{verbatim}
-3.14    10.    .001    1e100    3.14e-10
+3.14    10.    .001    1e100    3.14e-10    0e0
 \end{verbatim}
 
 Note that numeric literals do not include a sign; a phrase like
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
@@ -65,3 +65,47 @@ def expect_error(s):
 expect_error("2.0e+")
 expect_error("1e-")
 expect_error("3-4e/21")
+
+
+if verbose:
+    print "testing literals with leading zeroes"
+
+def expect_same(test_source, expected):
+    got = eval(test_source)
+    if got != expected:
+        raise TestFailed("eval(%r) gave %r, but expected %r" %
+                         (test_source, got, expected))
+
+expect_error("077787")
+expect_error("0xj")
+expect_error("0x.")
+expect_error("0e")
+expect_same("0777", 511)
+expect_same("0777L", 511)
+expect_same("000777", 511)
+expect_same("0xff", 255)
+expect_same("0xffL", 255)
+expect_same("0XfF", 255)
+expect_same("0777.", 777)
+expect_same("0777.0", 777)
+expect_same("000000000000000000000000000000000000000000000000000777e0", 777)
+expect_same("0777e1", 7770)
+expect_same("0e0", 0)
+expect_same("0000E-012", 0)
+expect_same("09.5", 9.5)
+expect_same("0777j", 777j)
+expect_same("00j", 0j)
+expect_same("00.0", 0)
+expect_same("0e3", 0)
+expect_same("090000000000000.", 90000000000000.)
+expect_same("090000000000000.0000000000000000000000", 90000000000000.)
+expect_same("090000000000000e0", 90000000000000.)
+expect_same("090000000000000e-0", 90000000000000.)
+expect_same("090000000000000j", 90000000000000j)
+expect_error("090000000000000")  # plain octal literal w/ decimal digit
+expect_error("080000000000000")  # plain octal literal w/ decimal digit
+expect_error("000000000000009")  # plain octal literal w/ decimal digit
+expect_error("000000000000008")  # plain octal literal w/ decimal digit
+expect_same("000000000000007", 7)
+expect_same("000000000000008.", 8.)
+expect_same("000000000000009.", 9.)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
@@ -56,9 +56,9 @@ def maybe(*choices): return apply(group, choices) + '?'
 Intnumber = group(Hexnumber, Octnumber, Decnumber)
 Exponent = r'[eE][-+]?\d+'
 Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
-Expfloat = r'[1-9]\d*' + Exponent
+Expfloat = r'\d+' + Exponent
 Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'0[jJ]', r'[1-9]\d*[jJ]', Floatnumber + r'[jJ]')
+Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 
 # Tail end of ' string.
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -3,6 +3,12 @@ What's New in Python 2.2a3?
 
 Core
 
++ The syntax of floating-point and imaginary literals has been
+  liberalized, to allow leading zeroes.  Examples of literals now
+  legal that were SyntaxErrors before:
+
+      00.0    0e3   0100j   07.5   00000000000000000008.
+
 + An old tokenizer bug allowed floating point literals with an incomplete
   exponent, such as 1e and 3.1e-.  Such literals now raise SyntaxError.
 
@@ -27,13 +33,13 @@ API
   module:
 
     - rename Py_TPFLAGS_GC to PyTPFLAGS_HAVE_GC
-    
+
     - use PyObject_GC_New or PyObject_GC_NewVar to allocate objects and
       PyObject_GC_Del to deallocate them
-      
+
     - rename PyObject_GC_Init to PyObject_GC_Track and PyObject_GC_Fini
       to PyObject_GC_UnTrack
-  
+
     - remove PyGC_HEAD_SIZE from object size calculations
 
     - remove calls to PyObject_AS_GC and PyObject_FROM_GC
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
@@ -722,7 +722,7 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
 	/* Number */
 	if (isdigit(c)) {
 		if (c == '0') {
-			/* Hex or octal */
+			/* Hex or octal -- maybe. */
 			c = tok_nextc(tok);
 			if (c == '.')
 				goto fraction;
@@ -737,13 +737,31 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
 				} while (isxdigit(c));
 			}
 			else {
-				/* XXX This is broken!  E.g.,
-				   09.9 should be accepted as float! */
+				int found_decimal = 0;
 				/* Octal; c is first char of it */
 				/* There's no 'isoctdigit' macro, sigh */
 				while ('0' <= c && c < '8') {
 					c = tok_nextc(tok);
 				}
+				if (isdigit(c)) {
+					found_decimal = 1;
+					do {
+						c = tok_nextc(tok);
+					} while (isdigit(c));
+				}
+				if (c == '.')
+					goto fraction;
+				else if (c == 'e' || c == 'E')
+					goto exponent;
+#ifndef WITHOUT_COMPLEX
+				else if (c == 'j' || c == 'J')
+					goto imaginary;
+#endif
+				else if (found_decimal) {
+					tok->done = E_TOKEN;
+					tok_backup(tok, c);
+					return ERRORTOKEN;
+				}
 			}
 			if (c == 'l' || c == 'L')
 				c = tok_nextc(tok);
@@ -765,6 +783,7 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
 					} while (isdigit(c));
 				}
 				if (c == 'e' || c == 'E') {
+		exponent:
 					/* Exponent part */
 					c = tok_nextc(tok);
 					if (c == '+' || c == '-')