From fba460a53e1a6496dce4262a7d5778f55da3e250 Mon Sep 17 00:00:00 2001 From: MOZGIII Date: Sun, 17 Dec 2023 17:47:22 -0300 Subject: [PATCH 1/9] Log raw WS messages to facilitate the debugging --- src/conn.rs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/conn.rs b/src/conn.rs index 46d04b5d..ddf53ff5 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -136,22 +136,28 @@ impl Stream for Connection { } // read from the ws - match ready!(pin.ws.poll_next_unpin(cx)) { - Some(Ok(msg)) => match serde_json::from_slice::>(&msg.into_data()) { + let msg = match ready!(pin.ws.poll_next_unpin(cx)) { + Some(Ok(msg)) => msg, + Some(Err(err)) => return Poll::Ready(Some(Err(CdpError::Ws(err)))), + None => { + // ws connection closed + return Poll::Ready(None); + } + }; + + tracing::trace!(target: "chromiumoxide::conn::raw_ws", ?msg, "Got raw WS message"); + + Poll::Ready(Some( + match serde_json::from_slice::>(&msg.into_data()) { Ok(msg) => { tracing::trace!("Received {:?}", msg); - Poll::Ready(Some(Ok(msg))) + Ok(msg) } Err(err) => { tracing::error!("Failed to deserialize WS response {}", err); - Poll::Ready(Some(Err(err.into()))) + Err(err.into()) } }, - Some(Err(err)) => Poll::Ready(Some(Err(CdpError::Ws(err)))), - None => { - // ws connection closed - Poll::Ready(None) - } - } + )) } } From fa58a28dac1fab423017f5d3fd9f011ee2d0f5a9 Mon Sep 17 00:00:00 2001 From: MOZGIII Date: Sun, 17 Dec 2023 18:39:21 -0300 Subject: [PATCH 2/9] Add debug-raw-ws-messages feature --- Cargo.toml | 2 ++ src/conn.rs | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index ff72856b..8c544c0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,8 @@ _fetcher-rusttls-tokio = ["fetcher", "chromiumoxide_fetcher/_rustls-tokio"] _fetcher-native-async-std = ["fetcher", "chromiumoxide_fetcher/_native-async-std"] _fetcher-native-tokio = ["fetcher", "chromiumoxide_fetcher/_native-tokio"] +debug-raw-ws-messages = [] + [[example]] name = "wiki-tokio" required-features = ["tokio-runtime"] diff --git a/src/conn.rs b/src/conn.rs index ddf53ff5..5b8e40b4 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -147,6 +147,9 @@ impl Stream for Connection { tracing::trace!(target: "chromiumoxide::conn::raw_ws", ?msg, "Got raw WS message"); + #[cfg(feature = "debug-raw-ws-messages")] + let msg_for_debug = msg.clone(); + Poll::Ready(Some( match serde_json::from_slice::>(&msg.into_data()) { Ok(msg) => { @@ -154,6 +157,9 @@ impl Stream for Connection { Ok(msg) } Err(err) => { + #[cfg(feature = "debug-raw-ws-messages")] + tracing::debug!(target: "chromiumoxide::conn::raw_ws::parse_errors", msg = ?msg_for_debug, "Failed to parse raw WS message"); + tracing::error!("Failed to deserialize WS response {}", err); Err(err.into()) } From c2841e2510d8f6988fcd1410c517c897ff9cca13 Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Wed, 14 Feb 2024 17:10:58 +0900 Subject: [PATCH 3/9] Emit an error on WsMessage::Text fails to be parsed --- Cargo.toml | 2 -- src/conn.rs | 52 ++++++++++++++++++++++++++-------------------------- src/error.rs | 3 +++ 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c544c0d..ff72856b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,8 +69,6 @@ _fetcher-rusttls-tokio = ["fetcher", "chromiumoxide_fetcher/_rustls-tokio"] _fetcher-native-async-std = ["fetcher", "chromiumoxide_fetcher/_native-async-std"] _fetcher-native-tokio = ["fetcher", "chromiumoxide_fetcher/_native-tokio"] -debug-raw-ws-messages = [] - [[example]] name = "wiki-tokio" required-features = ["tokio-runtime"] diff --git a/src/conn.rs b/src/conn.rs index 5b8e40b4..884e617b 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -3,6 +3,7 @@ use std::marker::PhantomData; use std::pin::Pin; use std::task::ready; +use async_tungstenite::tungstenite::Message as WsMessage; use async_tungstenite::{tungstenite::protocol::WebSocketConfig, WebSocketStream}; use futures::stream::Stream; use futures::task::{Context, Poll}; @@ -132,38 +133,37 @@ impl Stream for Connection { pin.pending_flush = Some(call); } } + break; } // read from the ws - let msg = match ready!(pin.ws.poll_next_unpin(cx)) { - Some(Ok(msg)) => msg, - Some(Err(err)) => return Poll::Ready(Some(Err(CdpError::Ws(err)))), + match ready!(pin.ws.poll_next_unpin(cx)) { + Some(Ok(WsMessage::Text(text))) => { + let ready = match serde_json::from_str::>(&text) { + Ok(msg) => { + tracing::trace!("Received {:?}", msg); + Ok(msg) + } + Err(err) => { + tracing::debug!(target: "chromiumoxide::conn::raw_ws::parse_errors", msg = text, "Failed to parse raw WS message"); + tracing::error!("Failed to deserialize WS response {}", err); + Err(err.into()) + } + }; + Poll::Ready(Some(ready)) + } + Some(Ok(WsMessage::Close(_))) => Poll::Ready(None), + Some(Ok(WsMessage::Ping(_))) | Some(Ok(WsMessage::Pong(_))) => { + // ignore pings + Poll::Pending + } + Some(Ok(msg)) => Poll::Ready(Some(Err(CdpError::UnexpectedWsMessage(msg)))), + Some(Err(err)) => Poll::Ready(Some(Err(CdpError::Ws(err)))), None => { // ws connection closed - return Poll::Ready(None); + Poll::Ready(None) } - }; - - tracing::trace!(target: "chromiumoxide::conn::raw_ws", ?msg, "Got raw WS message"); - - #[cfg(feature = "debug-raw-ws-messages")] - let msg_for_debug = msg.clone(); - - Poll::Ready(Some( - match serde_json::from_slice::>(&msg.into_data()) { - Ok(msg) => { - tracing::trace!("Received {:?}", msg); - Ok(msg) - } - Err(err) => { - #[cfg(feature = "debug-raw-ws-messages")] - tracing::debug!(target: "chromiumoxide::conn::raw_ws::parse_errors", msg = ?msg_for_debug, "Failed to parse raw WS message"); - - tracing::error!("Failed to deserialize WS response {}", err); - Err(err.into()) - } - }, - )) + } } } diff --git a/src/error.rs b/src/error.rs index 99e45d4c..ce75bab4 100644 --- a/src/error.rs +++ b/src/error.rs @@ -4,6 +4,7 @@ use std::process::ExitStatus; use std::time::Instant; use async_tungstenite::tungstenite; +use async_tungstenite::tungstenite::Message; use base64::DecodeError; use futures::channel::mpsc::SendError; use futures::channel::oneshot::Canceled; @@ -28,6 +29,8 @@ pub enum CdpError { Chrome(#[from] chromiumoxide_types::Error), #[error("Received no response from the chromium instance.")] NoResponse, + #[error("Received unexpected ws message: {0:?}")] + UnexpectedWsMessage(Message), #[error("{0}")] ChannelSendError(#[from] ChannelError), #[error("Browser process exited with status {0:?} before websocket URL could be resolved, stderr: {1:?}")] From cbc4db5f8c3ec592fe461974ef21f087a8733d2a Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Wed, 14 Feb 2024 22:09:22 +0900 Subject: [PATCH 4/9] Wake the task before returning Poll::Pending --- src/conn.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/conn.rs b/src/conn.rs index 884e617b..72c0d9ea 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -154,8 +154,9 @@ impl Stream for Connection { Poll::Ready(Some(ready)) } Some(Ok(WsMessage::Close(_))) => Poll::Ready(None), + // ignore ping and pong Some(Ok(WsMessage::Ping(_))) | Some(Ok(WsMessage::Pong(_))) => { - // ignore pings + cx.waker().wake_by_ref(); Poll::Pending } Some(Ok(msg)) => Poll::Ready(Some(Err(CdpError::UnexpectedWsMessage(msg)))), From c3dc4b2a610236d15ca80d7cf48426a04802d7a9 Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Wed, 14 Feb 2024 23:32:55 +0900 Subject: [PATCH 5/9] Stop using #[serde(untagged)] for Message --- chromiumoxide_types/src/lib.rs | 29 +++++++++++++++++++++++++++-- src/conn.rs | 2 +- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/chromiumoxide_types/src/lib.rs b/chromiumoxide_types/src/lib.rs index ce6f022e..9d46decc 100644 --- a/chromiumoxide_types/src/lib.rs +++ b/chromiumoxide_types/src/lib.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::fmt; use std::fmt::Debug; use std::ops::Deref; +use std::str::FromStr; use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; @@ -199,8 +200,7 @@ pub struct Response { /// An incoming message read from the web socket can either be a response to a /// previously submitted `Request`, identified by an identifier `id`, or an /// `Event` emitted by the server. -#[derive(Deserialize, Debug, Clone)] -#[serde(untagged)] +#[derive(Debug, Clone)] #[allow(clippy::large_enum_variant)] pub enum Message { /// A response for a request @@ -209,6 +209,31 @@ pub enum Message { Event(T), } +// #[serde(untagged)] does not work correctly with serde_json and numeric types in the schema +// This could be fixed by activating the `arbitrary_precision` feature in serde_json and using +// `serde_json::Number` instead of all usage of `f64` or else. +// For now, we use a `FromStr` trait to deserialize the message, instead of `Deserialize` with +// using `serde_json` to deserialize the message to `serde_json::Value` at first and then trying to +// deserialize both `Response` or `Event` from the `serde_json::Value`. +impl FromStr for Message +where + T: DeserializeOwned + Debug, +{ + type Err = serde_json::Error; + + fn from_str(s: &str) -> Result { + if let Ok(response) = serde_json::from_str::(s) { + Ok(Message::Response(response)) + } else { + // For now, returns an error only about the event deserialization. + // Ideally, we can return an custom error type that contains errors for both response and event. + // It seems not necessary for now because the response seems to be deserialized correctly in most cases. + let event = serde_json::from_str::(s)?; + Ok(Message::Event(event)) + } + } +} + /// A response can either contain the `Command::Response` type in the `result` /// field of the payload or an `Error` in the `error` field if the request /// resulted in an error. diff --git a/src/conn.rs b/src/conn.rs index 72c0d9ea..4b188aec 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -140,7 +140,7 @@ impl Stream for Connection { // read from the ws match ready!(pin.ws.poll_next_unpin(cx)) { Some(Ok(WsMessage::Text(text))) => { - let ready = match serde_json::from_str::>(&text) { + let ready = match text.parse::>() { Ok(msg) => { tracing::trace!("Received {:?}", msg); Ok(msg) From c3e16729e65bacfc20c7c12ece30121219bf5c44 Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Wed, 14 Feb 2024 23:44:21 +0900 Subject: [PATCH 6/9] Update the comment --- chromiumoxide_types/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chromiumoxide_types/src/lib.rs b/chromiumoxide_types/src/lib.rs index 9d46decc..ca6e2158 100644 --- a/chromiumoxide_types/src/lib.rs +++ b/chromiumoxide_types/src/lib.rs @@ -215,6 +215,8 @@ pub enum Message { // For now, we use a `FromStr` trait to deserialize the message, instead of `Deserialize` with // using `serde_json` to deserialize the message to `serde_json::Value` at first and then trying to // deserialize both `Response` or `Event` from the `serde_json::Value`. +// Also, this has a win in error reporting compared to `untagged` because we can return the +// detailed error instead of just "did not match any variant of untagged enum". impl FromStr for Message where T: DeserializeOwned + Debug, From 81a49a894b57afe78803813705c9fb3a17ee9d3c Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Thu, 15 Feb 2024 00:23:11 +0900 Subject: [PATCH 7/9] Update the comment --- chromiumoxide_types/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chromiumoxide_types/src/lib.rs b/chromiumoxide_types/src/lib.rs index ca6e2158..3f736ec3 100644 --- a/chromiumoxide_types/src/lib.rs +++ b/chromiumoxide_types/src/lib.rs @@ -209,12 +209,12 @@ pub enum Message { Event(T), } -// #[serde(untagged)] does not work correctly with serde_json and numeric types in the schema -// This could be fixed by activating the `arbitrary_precision` feature in serde_json and using -// `serde_json::Number` instead of all usage of `f64` or else. +// `#[serde(untagged)]` does not work correctly with serde_json and some numeric types in the schema, +// without activating `arbitrary_precision` feature of serde_json and using `serde_json::Number` instead of all usage of `f64` or else. // For now, we use a `FromStr` trait to deserialize the message, instead of `Deserialize` with // using `serde_json` to deserialize the message to `serde_json::Value` at first and then trying to // deserialize both `Response` or `Event` from the `serde_json::Value`. +// - https://github.com/serde-rs/serde/issues/2661 // Also, this has a win in error reporting compared to `untagged` because we can return the // detailed error instead of just "did not match any variant of untagged enum". impl FromStr for Message From cafad75e61489fb6209eda047655c8b13b8d69ea Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Thu, 15 Feb 2024 12:59:39 +0900 Subject: [PATCH 8/9] Update the comment --- chromiumoxide_types/src/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/chromiumoxide_types/src/lib.rs b/chromiumoxide_types/src/lib.rs index 3f736ec3..4a1112d8 100644 --- a/chromiumoxide_types/src/lib.rs +++ b/chromiumoxide_types/src/lib.rs @@ -211,9 +211,10 @@ pub enum Message { // `#[serde(untagged)]` does not work correctly with serde_json and some numeric types in the schema, // without activating `arbitrary_precision` feature of serde_json and using `serde_json::Number` instead of all usage of `f64` or else. -// For now, we use a `FromStr` trait to deserialize the message, instead of `Deserialize` with -// using `serde_json` to deserialize the message to `serde_json::Value` at first and then trying to -// deserialize both `Response` or `Event` from the `serde_json::Value`. +// For now, we implement the `FromStr` trait to deserialize the message in no additional allocation manner, +// instead of `Deserialize` with using `serde_json` to deserialize the message to `serde_json::Value` at first, +// and then trying to deserialize `Response` or `Event` from the `serde_json::Value`. +// This should be fine because it never be parsed as other formats than JSON in real use case. // - https://github.com/serde-rs/serde/issues/2661 // Also, this has a win in error reporting compared to `untagged` because we can return the // detailed error instead of just "did not match any variant of untagged enum". From 8e8732986dd65d2dfe2aa26ef21c1b1466a4ae2b Mon Sep 17 00:00:00 2001 From: Ryo Hirayama Date: Thu, 15 Feb 2024 17:35:58 +0900 Subject: [PATCH 9/9] Use raw_value feature in serde_json --- chromiumoxide_types/Cargo.toml | 2 +- chromiumoxide_types/src/lib.rs | 28 ++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/chromiumoxide_types/Cargo.toml b/chromiumoxide_types/Cargo.toml index 395983d2..469ec043 100644 --- a/chromiumoxide_types/Cargo.toml +++ b/chromiumoxide_types/Cargo.toml @@ -12,4 +12,4 @@ include = ["src/**/*", "LICENSE-*"] [dependencies] serde = { version = "1.0.130", features = ["derive"] } -serde_json = "1.0.72" +serde_json = { version = "1.0.72", features = ["raw_value"] } diff --git a/chromiumoxide_types/src/lib.rs b/chromiumoxide_types/src/lib.rs index 4a1112d8..081d0c52 100644 --- a/chromiumoxide_types/src/lib.rs +++ b/chromiumoxide_types/src/lib.rs @@ -6,6 +6,7 @@ use std::str::FromStr; use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; +use serde_json::value::RawValue; pub type MethodId = Cow<'static, str>; @@ -200,6 +201,9 @@ pub struct Response { /// An incoming message read from the web socket can either be a response to a /// previously submitted `Request`, identified by an identifier `id`, or an /// `Event` emitted by the server. +/// +/// This implements both `Deserialize` and `FromStr`, where the latter is preferred in performance +/// and error reporting. #[derive(Debug, Clone)] #[allow(clippy::large_enum_variant)] pub enum Message { @@ -209,18 +213,26 @@ pub enum Message { Event(T), } -// `#[serde(untagged)]` does not work correctly with serde_json and some numeric types in the schema, +// Manually implements Deserialize because `#[serde(untagged)]` does not work correctly with serde_json and some numeric types in the schema, // without activating `arbitrary_precision` feature of serde_json and using `serde_json::Number` instead of all usage of `f64` or else. -// For now, we implement the `FromStr` trait to deserialize the message in no additional allocation manner, -// instead of `Deserialize` with using `serde_json` to deserialize the message to `serde_json::Value` at first, -// and then trying to deserialize `Response` or `Event` from the `serde_json::Value`. -// This should be fine because it never be parsed as other formats than JSON in real use case. // - https://github.com/serde-rs/serde/issues/2661 -// Also, this has a win in error reporting compared to `untagged` because we can return the -// detailed error instead of just "did not match any variant of untagged enum". +// For now, we implement the `FromStr` trait to deserialize the message, and use it in this `Deserialize` impl. +impl<'de, T> Deserialize<'de> for Message +where + T: DeserializeOwned, +{ + fn deserialize(deserializer: D) -> Result, D::Error> + where + D: serde::Deserializer<'de>, + { + let value: &RawValue = Deserialize::deserialize(deserializer)?; + value.get().parse().map_err(serde::de::Error::custom) + } +} + impl FromStr for Message where - T: DeserializeOwned + Debug, + T: DeserializeOwned, { type Err = serde_json::Error;