From e6f1f60b1cb69aa23a6504d872b3486a6ff265bb Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Fri, 6 Sep 2024 17:37:08 +0100 Subject: [PATCH] handle invalid accept-charset in requests - default to utf-8 (#584) * add tests for accept-charset * default to utf-8 if charset is invalid scalafmt * xml tests (1 broken still) * add charsetWithUtf8Failover function * Update MarshallingSpec.scala * scala 2.12 compile issue --- .../http/scaladsl/model/HttpCharset.scala | 13 ++++++ .../marshalling/MarshallingSpec.scala | 17 +++++++ .../MarshallingDirectivesSpec.scala | 44 +++++++++++++++++++ .../PredefinedToEntityMarshallers.scala | 10 ++++- 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala b/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala index 46967e1ed..c74ec3384 100644 --- a/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala +++ b/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala @@ -66,6 +66,19 @@ final case class HttpCharset private[http] (override val value: String)(val alia /** Returns the Charset for this charset if available or throws an exception otherwise */ def nioCharset: Charset = _nioCharset.get + /** + * @return this HttpCharset instance if this charset can be parsed to a + * java.nio.charset.Charset instance, otherwise returns the UTF-8 charset. + * @since 1.1.0 + */ + def charsetWithUtf8Failover: HttpCharset = { + if (_nioCharset.isSuccess) { + this + } else { + HttpCharsets.`UTF-8` + } + } + private def readObject(in: java.io.ObjectInputStream): Unit = { in.defaultReadObject() _nioCharset = HttpCharset.findNioCharset(value) diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala index fe0cb7446..952b581b3 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala @@ -56,6 +56,23 @@ class MarshallingSpec extends AnyFreeSpec with Matchers with BeforeAndAfterAll w } } + "The PredefinedToEntityMarshallers" - { + "StringMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in { + val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid"))) + val request = HttpRequest().withHeaders(invalidAcceptCharsetHeader) + val responseEntity = marshalToResponse("Ha“llo", request).entity + responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`) + responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain` + } + "CharArrayMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in { + val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid"))) + val request = HttpRequest().withHeaders(invalidAcceptCharsetHeader) + val responseEntity = marshalToResponse("Ha“llo".toCharArray(), request).entity + responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`) + responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain` + } + } + "The PredefinedToResponseMarshallers" - { "fromStatusCode should properly marshal a status code that doesn't allow an entity" in { marshalToResponse(StatusCodes.NoContent) shouldEqual HttpResponse(StatusCodes.NoContent, diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala index 641377876..a2b2ba02c 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala @@ -250,6 +250,11 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside { rejection shouldEqual UnacceptedResponseContentTypeRejection(Set(ContentType(`application/json`))) } } + "reject JSON rendering if an `Accept-Charset` request header requests an unknown encoding" in { + Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check { + rejection shouldEqual UnacceptedResponseContentTypeRejection(Set(ContentType(`application/json`))) + } + } val acceptHeaderUtf = Accept.parseFromValueString("application/json;charset=utf8").right.get val acceptHeaderNonUtf = Accept.parseFromValueString("application/json;charset=ISO-8859-1").right.get "render JSON response when `Accept` header is present with the `charset` parameter ignoring it" in { @@ -275,4 +280,43 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside { } } } + + "The marshalling infrastructure for text" should { + val foo = "Hällö" + "render text with UTF-8 encoding if no `Accept-Charset` request header is present" in { + Get() ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo) + } + } + "render text with requested encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in { + Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `ISO-8859-1`), foo) + } + } + "render text with UTF-8 encoding if an `Accept-Charset` request header requests an unknown encoding" in { + Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo) + } + } + } + + "The marshalling infrastructure for text/xml" should { + val foo = Hällö + "render XML with UTF-8 encoding if no `Accept-Charset` request header is present" in { + Get() ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `UTF-8`), foo.toString) + } + } + "render XML with requested encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in { + Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `ISO-8859-1`), foo.toString) + } + } + "render XML with UTF-8 encoding if an `Accept-Charset` request header requests an unknown encoding" ignore { + // still returns Content-Type: text/xml; charset=unknown + Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `UTF-8`), foo.toString) + } + } + } } diff --git a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala index c4064b434..0a5ee275d 100644 --- a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala +++ b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala @@ -34,7 +34,9 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers { implicit val CharArrayMarshaller: ToEntityMarshaller[Array[Char]] = charArrayMarshaller(`text/plain`) def charArrayMarshaller(mediaType: MediaType.WithOpenCharset): ToEntityMarshaller[Array[Char]] = Marshaller.withOpenCharset(mediaType) { (value, charset) => - marshalCharArray(value, mediaType.withCharset(charset)) + // https://github.com/apache/pekko-http/issues/300 + // ignore issues with invalid charset - use UTF-8 instead + marshalCharArray(value, mediaType.withCharset(charset.charsetWithUtf8Failover)) } def charArrayMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[Array[Char]] = Marshaller.withFixedContentType(mediaType) { value => marshalCharArray(value, mediaType) } @@ -55,7 +57,11 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers { implicit val StringMarshaller: ToEntityMarshaller[String] = stringMarshaller(`text/plain`) def stringMarshaller(mediaType: MediaType.WithOpenCharset): ToEntityMarshaller[String] = - Marshaller.withOpenCharset(mediaType) { (s, cs) => HttpEntity(mediaType.withCharset(cs), s) } + Marshaller.withOpenCharset(mediaType) { (s, cs) => + // https://github.com/apache/pekko-http/issues/300 + // ignore issues with invalid charset - use UTF-8 instead + HttpEntity(mediaType.withCharset(cs.charsetWithUtf8Failover), s) + } def stringMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[String] = Marshaller.withFixedContentType(mediaType) { s => HttpEntity(mediaType, s) }