From e22eb9853da47b6a60bf1e9b58526ca0618bcfce Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Fri, 17 Jul 2020 19:50:42 +0200 Subject: [PATCH 1/7] fixes for ZTransducer#utfXX --- .../scala/zio/stream/ZTransducerSpec.scala | 36 ++++++++++++++-- .../main/scala/zio/stream/ZTransducer.scala | 42 +++++++++++++++---- 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/streams-tests/shared/src/test/scala/zio/stream/ZTransducerSpec.scala b/streams-tests/shared/src/test/scala/zio/stream/ZTransducerSpec.scala index a61f50c3d4c3..a82af5287699 100644 --- a/streams-tests/shared/src/test/scala/zio/stream/ZTransducerSpec.scala +++ b/streams-tests/shared/src/test/scala/zio/stream/ZTransducerSpec.scala @@ -480,6 +480,16 @@ object ZTransducerSpec extends ZIOBaseSpec { equalTo(new String(Array(0xF0.toByte, 0x90.toByte), "UTF-8")) ) } + }, + testM("handle byte order mark") { + checkM(Gen.anyString) { s => + ZTransducer.utf8Decode.push.use { push => + for { + part1 <- push(Some(Chunk[Byte](-17, -69, -65) ++ Chunk.fromArray(s.getBytes("UTF-8")))) + part2 <- push(None) + } yield assert((part1 ++ part2).mkString)(equalTo(s)) + } + } } ), suite("iso_8859_1")( @@ -566,6 +576,25 @@ object ZTransducerSpec extends ZIOBaseSpec { } assertM(test.run)(succeeds(equalTo(0))) } + }, + testM("emits data if less than n are collected") { + val gen = + for { + data <- Gen.chunkOf(Gen.anyInt) + n <- Gen.anyInt.filter(_ > data.length) + } yield (data, n) + + checkM(gen) { + case (data, n) => + val test = + ZStream + .fromChunk(data) + .transduce { + ZTransducer.branchAfter(n)(ZTransducer.prepend) + } + .runCollect + assertM(test.run)(succeeds(equalTo(data))) + } } ), suite("utf16BEDecode")( @@ -603,15 +632,14 @@ object ZTransducerSpec extends ZIOBaseSpec { } } }, - testM("no magic sequence") { + testM("no magic sequence - parse as big endian") { checkM(Gen.anyString.filter(_.nonEmpty)) { s => - val test = ZTransducer.utf16Decode.push.use { push => + ZTransducer.utf16Decode.push.use { push => for { part1 <- push(Some(Chunk.fromArray(s.getBytes(StandardCharsets.UTF_16BE)))) part2 <- push(None) - } yield (part1 ++ part2).mkString + } yield assert((part1 ++ part2).mkString)(equalTo(s)) } - assertM(test.run)(fails(anything)) } }, testM("big endian") { diff --git a/streams/shared/src/main/scala/zio/stream/ZTransducer.scala b/streams/shared/src/main/scala/zio/stream/ZTransducer.scala index b242840e9bcc..b04d586fbe7c 100644 --- a/streams/shared/src/main/scala/zio/stream/ZTransducer.scala +++ b/streams/shared/src/main/scala/zio/stream/ZTransducer.scala @@ -119,6 +119,7 @@ object ZTransducer extends ZTransducerPlatformSpecificConstructors { /** * Reads the first n values from the stream and uses them to choose the transducer that will be used for the remainder of the stream. + * If the stream ends before it has collected n values the partial chunk will be provided to f. */ def branchAfter[R, E, I, O](n: Int)(f: Chunk[I] => ZTransducer[R, E, I, O]): ZTransducer[R, E, I, O] = ZTransducer { @@ -139,8 +140,8 @@ object ZTransducer extends ZTransducerPlatformSpecificConstructors { stateRef.getAndSet(State.initial).flatMap { case State.Emitting(finalizer, push) => push(None) <* finalizer(Exit.unit) - case _ => - ZIO.succeedNow(Chunk.empty) + case State.Collecting(data) => + f(data).push.use(_(None)) } case Some(data) => stateRef.modify { @@ -641,6 +642,21 @@ object ZTransducer extends ZTransducerPlatformSpecificConstructors { def last[O]: ZTransducer[Any, Nothing, O, Option[O]] = foldLeft[O, Option[O]](Option.empty[O])((_, a) => Some(a)) + /** + * Emits the provided chunk before emitting any other value. + */ + def prepend[A](values: Chunk[A]): ZTransducer[Any, Nothing, A, A] = + ZTransducer { + ZRef.makeManaged(values).map { stateRef => + { + case None => + stateRef.getAndSet(Chunk.empty) + case Some(xs) => + stateRef.getAndSet(Chunk.empty).map(c => if (c.isEmpty) xs else c ++ xs) + } + } + } + /** * Splits strings on newlines. Handles both Windows newlines (`\r\n`) and UNIX newlines (`\n`). */ @@ -763,8 +779,8 @@ object ZTransducer extends ZTransducerPlatformSpecificConstructors { * This transducer uses the String constructor's behavior when handling malformed byte * sequences. */ - val utf8Decode: ZTransducer[Any, Nothing, Byte, String] = - ZTransducer { + val utf8Decode: ZTransducer[Any, Nothing, Byte, String] = { + val transducer = ZTransducer[Any, Nothing, Byte, String] { def is2ByteSequenceStart(b: Byte) = (b & 0xE0) == 0xC0 def is3ByteSequenceStart(b: Byte) = (b & 0xF0) == 0xE0 def is4ByteSequenceStart(b: Byte) = (b & 0xF8) == 0xF0 @@ -812,21 +828,33 @@ object ZTransducer extends ZTransducerPlatformSpecificConstructors { } } + // handle optional byte order mark + branchAfter(3) { bytes => + bytes.toList match { + case -17 :: -69 :: -65 :: Nil => + transducer + case _ => + prepend(bytes) >>> transducer + } + } + } + /** * Decodes chunks of UTF-16 bytes into strings. + * If no byte order mark is found big-endianness is assumed. * * This will fail with an `IllegalArgumentException` if no byte order mark was found and will * use the error handling behavior of the endian-specific decoder otherwise. */ - val utf16Decode: ZTransducer[Any, IllegalArgumentException, Byte, String] = + val utf16Decode: ZTransducer[Any, Nothing, Byte, String] = branchAfter(2) { bytes => bytes.toList match { case -2 :: -1 :: Nil => utf16BEDecode case -1 :: -2 :: Nil => utf16LEDecode - case xs => - fail(new IllegalArgumentException(s"Not a valid byte order mark ${xs.map(_ & 0xFF).mkString(", ")}")) + case _ => + prepend(bytes) >>> utf16BEDecode } } From 491996168d37dbc131c8eeceaf39774cc7ea2261 Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Fri, 17 Jul 2020 21:23:12 +0200 Subject: [PATCH 2/7] retrigger ci From ad48326ba04db257d53aa3517f8e543826014f81 Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Fri, 17 Jul 2020 23:14:23 +0200 Subject: [PATCH 3/7] update doc string --- streams/shared/src/main/scala/zio/stream/ZTransducer.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/streams/shared/src/main/scala/zio/stream/ZTransducer.scala b/streams/shared/src/main/scala/zio/stream/ZTransducer.scala index b04d586fbe7c..edec6ac43af7 100644 --- a/streams/shared/src/main/scala/zio/stream/ZTransducer.scala +++ b/streams/shared/src/main/scala/zio/stream/ZTransducer.scala @@ -843,8 +843,7 @@ object ZTransducer extends ZTransducerPlatformSpecificConstructors { * Decodes chunks of UTF-16 bytes into strings. * If no byte order mark is found big-endianness is assumed. * - * This will fail with an `IllegalArgumentException` if no byte order mark was found and will - * use the error handling behavior of the endian-specific decoder otherwise. + * It will use the error handling behavior of the endian-specific decoder when handling malformed byte sequences. */ val utf16Decode: ZTransducer[Any, Nothing, Byte, String] = branchAfter(2) { bytes => From 17fbb6cd6823038a20a4df2f7ae91610c550cb13 Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Sat, 18 Jul 2020 12:33:28 +0200 Subject: [PATCH 4/7] retrigger ci From 593961fad7688f876b797ce482f2f1cd60026ee6 Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Sun, 19 Jul 2020 18:08:46 +0200 Subject: [PATCH 5/7] retrigger ci From 91644a31122b27475dc1646608f643a1f62f33ed Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Sun, 19 Jul 2020 18:38:19 +0200 Subject: [PATCH 6/7] retrigger ci From 63a5fcfc01e7719e0ced0d524972eeb4c6e6793d Mon Sep 17 00:00:00 2001 From: Maxim Schuwalow Date: Sun, 19 Jul 2020 21:51:57 +0200 Subject: [PATCH 7/7] retrigger ci