4242
4343ENCODER (gb2312 )
4444{
45- while (inleft > 0 ) {
46- Py_UCS4 c = IN1 ;
45+ while (* inpos < inlen ) {
46+ Py_UCS4 c = INCHAR1 ;
4747 DBCHAR code ;
4848
4949 if (c < 0x80 ) {
50- WRITE1 ((unsigned char )c )
51- NEXT (1 , 1 )
50+ WRITEBYTE1 ((unsigned char )c )
51+ NEXT (1 , 1 );
5252 continue ;
5353 }
54- UCS4INVALID (c )
54+
55+ if (c > 0xFFFF )
56+ return 1 ;
5557
5658 REQUIRE_OUTBUF (2 )
5759 TRYMAP_ENC (gbcommon , code , c );
@@ -60,9 +62,9 @@ ENCODER(gb2312)
6062 if (code & 0x8000 ) /* MSB set: GBK */
6163 return 1 ;
6264
63- OUT1 ((code >> 8 ) | 0x80 )
64- OUT2 ((code & 0xFF ) | 0x80 )
65- NEXT (1 , 2 )
65+ OUTBYTE1 ((code >> 8 ) | 0x80 )
66+ OUTBYTE2 ((code & 0xFF ) | 0x80 )
67+ NEXT (1 , 2 );
6668 }
6769
6870 return 0 ;
@@ -80,7 +82,7 @@ DECODER(gb2312)
8082 }
8183
8284 REQUIRE_INBUF (2 )
83- TRYMAP_DEC (gb2312 , writer , c ^ 0x80 , IN2 ^ 0x80 ) {
85+ TRYMAP_DEC (gb2312 , writer , c ^ 0x80 , INBYTE2 ^ 0x80 ) {
8486 NEXT_IN (2 );
8587 }
8688 else return 1 ;
@@ -96,28 +98,30 @@ DECODER(gb2312)
9698
9799ENCODER (gbk )
98100{
99- while (inleft > 0 ) {
100- Py_UCS4 c = IN1 ;
101+ while (* inpos < inlen ) {
102+ Py_UCS4 c = INCHAR1 ;
101103 DBCHAR code ;
102104
103105 if (c < 0x80 ) {
104- WRITE1 ((unsigned char )c )
105- NEXT (1 , 1 )
106+ WRITEBYTE1 ((unsigned char )c )
107+ NEXT (1 , 1 );
106108 continue ;
107109 }
108- UCS4INVALID (c )
110+
111+ if (c > 0xFFFF )
112+ return 1 ;
109113
110114 REQUIRE_OUTBUF (2 )
111115
112116 GBK_ENCODE (c , code )
113117 else return 1 ;
114118
115- OUT1 ((code >> 8 ) | 0x80 )
119+ OUTBYTE1 ((code >> 8 ) | 0x80 )
116120 if (code & 0x8000 )
117- OUT2 ((code & 0xFF )) /* MSB set: GBK */
121+ OUTBYTE2 ((code & 0xFF )) /* MSB set: GBK */
118122 else
119- OUT2 ((code & 0xFF ) | 0x80 ) /* MSB unset: GB2312 */
120- NEXT (1 , 2 )
123+ OUTBYTE2 ((code & 0xFF ) | 0x80 ) /* MSB unset: GB2312 */
124+ NEXT (1 , 2 );
121125 }
122126
123127 return 0 ;
@@ -126,7 +130,7 @@ ENCODER(gbk)
126130DECODER (gbk )
127131{
128132 while (inleft > 0 ) {
129- unsigned char c = IN1 ;
133+ unsigned char c = INBYTE1 ;
130134
131135 if (c < 0x80 ) {
132136 OUTCHAR (c );
@@ -136,7 +140,7 @@ DECODER(gbk)
136140
137141 REQUIRE_INBUF (2 )
138142
139- GBK_DECODE (c , IN2 , writer )
143+ GBK_DECODE (c , INBYTE2 , writer )
140144 else return 1 ;
141145
142146 NEXT_IN (2 );
@@ -152,41 +156,31 @@ DECODER(gbk)
152156
153157ENCODER (gb18030 )
154158{
155- while (inleft > 0 ) {
156- Py_UCS4 c = IN1 ;
159+ while (* inpos < inlen ) {
160+ Py_UCS4 c = INCHAR1 ;
157161 DBCHAR code ;
158162
159163 if (c < 0x80 ) {
160- WRITE1 (c )
161- NEXT (1 , 1 )
164+ WRITEBYTE1 (c )
165+ NEXT (1 , 1 );
162166 continue ;
163167 }
164168
165- DECODE_SURROGATE (c )
166- if (c > 0x10FFFF )
167- #if Py_UNICODE_SIZE == 2
168- return 2 ; /* surrogates pair */
169- #else
170- return 1 ;
171- #endif
172- else if (c >= 0x10000 ) {
169+ if (c >= 0x10000 ) {
173170 Py_UCS4 tc = c - 0x10000 ;
171+ assert (c <= 0x10FFFF );
174172
175173 REQUIRE_OUTBUF (4 )
176174
177- OUT4 ((unsigned char )(tc % 10 ) + 0x30 )
175+ OUTBYTE4 ((unsigned char )(tc % 10 ) + 0x30 )
178176 tc /= 10 ;
179- OUT3 ((unsigned char )(tc % 126 ) + 0x81 )
177+ OUTBYTE3 ((unsigned char )(tc % 126 ) + 0x81 )
180178 tc /= 126 ;
181- OUT2 ((unsigned char )(tc % 10 ) + 0x30 )
179+ OUTBYTE2 ((unsigned char )(tc % 10 ) + 0x30 )
182180 tc /= 10 ;
183- OUT1 ((unsigned char )(tc + 0x90 ))
181+ OUTBYTE1 ((unsigned char )(tc + 0x90 ))
184182
185- #if Py_UNICODE_SIZE == 2
186- NEXT (2 , 4 ) /* surrogates pair */
187- #else
188- NEXT (1 , 4 )
189- #endif
183+ NEXT (1 , 4 );
190184 continue ;
191185 }
192186
@@ -209,15 +203,15 @@ ENCODER(gb18030)
209203 tc = c - utrrange -> first +
210204 utrrange -> base ;
211205
212- OUT4 ((unsigned char )(tc % 10 ) + 0x30 )
206+ OUTBYTE4 ((unsigned char )(tc % 10 ) + 0x30 )
213207 tc /= 10 ;
214- OUT3 ((unsigned char )(tc % 126 ) + 0x81 )
208+ OUTBYTE3 ((unsigned char )(tc % 126 ) + 0x81 )
215209 tc /= 126 ;
216- OUT2 ((unsigned char )(tc % 10 ) + 0x30 )
210+ OUTBYTE2 ((unsigned char )(tc % 10 ) + 0x30 )
217211 tc /= 10 ;
218- OUT1 ((unsigned char )tc + 0x81 )
212+ OUTBYTE1 ((unsigned char )tc + 0x81 )
219213
220- NEXT (1 , 4 )
214+ NEXT (1 , 4 );
221215 break ;
222216 }
223217
@@ -226,13 +220,13 @@ ENCODER(gb18030)
226220 continue ;
227221 }
228222
229- OUT1 ((code >> 8 ) | 0x80 )
223+ OUTBYTE1 ((code >> 8 ) | 0x80 )
230224 if (code & 0x8000 )
231- OUT2 ((code & 0xFF )) /* MSB set: GBK or GB18030ext */
225+ OUTBYTE2 ((code & 0xFF )) /* MSB set: GBK or GB18030ext */
232226 else
233- OUT2 ((code & 0xFF ) | 0x80 ) /* MSB unset: GB2312 */
227+ OUTBYTE2 ((code & 0xFF ) | 0x80 ) /* MSB unset: GB2312 */
234228
235- NEXT (1 , 2 )
229+ NEXT (1 , 2 );
236230 }
237231
238232 return 0 ;
@@ -241,7 +235,7 @@ ENCODER(gb18030)
241235DECODER (gb18030 )
242236{
243237 while (inleft > 0 ) {
244- unsigned char c = IN1 , c2 ;
238+ unsigned char c = INBYTE1 , c2 ;
245239
246240 if (c < 0x80 ) {
247241 OUTCHAR (c );
@@ -251,15 +245,15 @@ DECODER(gb18030)
251245
252246 REQUIRE_INBUF (2 )
253247
254- c2 = IN2 ;
248+ c2 = INBYTE2 ;
255249 if (c2 >= 0x30 && c2 <= 0x39 ) { /* 4 bytes seq */
256250 const struct _gb18030_to_unibmp_ranges * utr ;
257251 unsigned char c3 , c4 ;
258252 Py_UCS4 lseq ;
259253
260254 REQUIRE_INBUF (4 )
261- c3 = IN3 ;
262- c4 = IN4 ;
255+ c3 = INBYTE3 ;
256+ c4 = INBYTE4 ;
263257 if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39 )
264258 return 1 ;
265259 c -= 0x81 ; c2 -= 0x30 ;
@@ -313,33 +307,34 @@ ENCODER_INIT(hz)
313307ENCODER_RESET (hz )
314308{
315309 if (state -> i != 0 ) {
316- WRITE2 ('~' , '}' )
310+ WRITEBYTE2 ('~' , '}' )
317311 state -> i = 0 ;
318- NEXT_OUT (2 )
312+ NEXT_OUT (2 );
319313 }
320314 return 0 ;
321315}
322316
323317ENCODER (hz )
324318{
325- while (inleft > 0 ) {
326- Py_UCS4 c = IN1 ;
319+ while (* inpos < inlen ) {
320+ Py_UCS4 c = INCHAR1 ;
327321 DBCHAR code ;
328322
329323 if (c < 0x80 ) {
330324 if (state -> i == 0 ) {
331- WRITE1 ((unsigned char )c )
332- NEXT (1 , 1 )
325+ WRITEBYTE1 ((unsigned char )c )
326+ NEXT (1 , 1 );
333327 }
334328 else {
335- WRITE3 ('~' , '}' , (unsigned char )c )
336- NEXT (1 , 3 )
329+ WRITEBYTE3 ('~' , '}' , (unsigned char )c )
330+ NEXT (1 , 3 );
337331 state -> i = 0 ;
338332 }
339333 continue ;
340334 }
341335
342- UCS4INVALID (c )
336+ if (c > 0xFFFF )
337+ return 1 ;
343338
344339 TRYMAP_ENC (gbcommon , code , c );
345340 else return 1 ;
@@ -348,13 +343,13 @@ ENCODER(hz)
348343 return 1 ;
349344
350345 if (state -> i == 0 ) {
351- WRITE4 ('~' , '{' , code >> 8 , code & 0xff )
352- NEXT (1 , 4 )
346+ WRITEBYTE4 ('~' , '{' , code >> 8 , code & 0xff )
347+ NEXT (1 , 4 );
353348 state -> i = 1 ;
354349 }
355350 else {
356- WRITE2 (code >> 8 , code & 0xff )
357- NEXT (1 , 2 )
351+ WRITEBYTE2 (code >> 8 , code & 0xff )
352+ NEXT (1 , 2 );
358353 }
359354 }
360355
@@ -376,10 +371,10 @@ DECODER_RESET(hz)
376371DECODER (hz )
377372{
378373 while (inleft > 0 ) {
379- unsigned char c = IN1 ;
374+ unsigned char c = INBYTE1 ;
380375
381376 if (c == '~' ) {
382- unsigned char c2 = IN2 ;
377+ unsigned char c2 = INBYTE2 ;
383378
384379 REQUIRE_INBUF (2 )
385380 if (c2 == '~' ) {
@@ -408,7 +403,7 @@ DECODER(hz)
408403 }
409404 else { /* GB mode */
410405 REQUIRE_INBUF (2 )
411- TRYMAP_DEC (gb2312 , writer , c , IN2 ) {
406+ TRYMAP_DEC (gb2312 , writer , c , INBYTE2 ) {
412407 NEXT_IN (2 );
413408 }
414409 else
0 commit comments