From 7cf20515a45da30ab0ea45e3b8a8d87ac201b0d1 Mon Sep 17 00:00:00 2001 From: rumblefrog Date: Mon, 15 Nov 2021 13:31:10 -0500 Subject: [PATCH] feat - discrete cookie return --- Cargo.lock | 191 +++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 5 +- lib/index.ts | 21 +++--- package.json | 2 +- src/builder.rs | 6 ++ src/client.rs | 38 +++++++++- src/lib.rs | 1 + src/time_jar.rs | 56 ++++++++++++++ 8 files changed, 305 insertions(+), 15 deletions(-) create mode 100644 src/time_jar.rs diff --git a/Cargo.lock b/Cargo.lock index af9f0ec..6f6af7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,6 +63,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "base-x" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4521f3e3d031370679b3b140beb36dfe4801b09ac77e30c61941f97df3ef28b" + [[package]] name = "base64" version = "0.13.0" @@ -120,6 +126,39 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "const_fn" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92cfa0fd5690b3cf8c1ef2cabbd9b7ef22fa53cf5e1f92b05103f6d5d1cf6e7" + +[[package]] +name = "cookie" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a5d7b21829bc7b4bf4754a978a241ae54ea55a40f92bb20216e54096f4b951" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "cookie_store" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3818dfca4b0cb5211a659bbcbb94225b7127407b2b135e650d717bfb78ab10d3" +dependencies = [ + "cookie", + "idna", + "log", + "publicsuffix", + "serde", + "serde_json", + "time", + "url", +] + [[package]] name = "core-foundation" version = "0.9.1" @@ -151,6 +190,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "697c714f50560202b1f4e2e09cd50a421881c83e9025db75d15f276616f04f40" +[[package]] +name = "discard" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0" + [[package]] name = "encoding_rs" version = "0.8.28" @@ -727,6 +772,16 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "publicsuffix" +version = "1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95b4ce31ff0a27d93c8de1849cf58162283752f065a90d508f1105fa6c9a213f" +dependencies = [ + "idna", + "url", +] + [[package]] name = "quote" version = "1.0.9" @@ -819,6 +874,8 @@ dependencies = [ "async-compression", "base64", "bytes", + "cookie", + "cookie_store", "encoding_rs", "futures-core", "futures-util", @@ -836,6 +893,7 @@ dependencies = [ "pin-project-lite", "serde", "serde_urlencoded", + "time", "tokio", "tokio-native-tls", "tokio-util", @@ -848,7 +906,7 @@ dependencies = [ [[package]] name = "rust-fetch" -version = "0.6.18" +version = "0.6.19" dependencies = [ "bytes", "env_logger", @@ -860,6 +918,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + [[package]] name = "ryu" version = "1.0.5" @@ -919,6 +986,20 @@ name = "serde" version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1056a0db1978e9dbf0f6e4fca677f6f9143dc1c19de346f22cac23e422196834" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.128" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13af2fbb8b60a8950d6c72a56d2095c28870367cc8e10c55e9745bac4995a2c4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "serde_json" @@ -943,6 +1024,12 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" + [[package]] name = "slab" version = "0.4.4" @@ -965,12 +1052,70 @@ dependencies = [ "winapi", ] +[[package]] +name = "standback" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff" +dependencies = [ + "version_check", +] + [[package]] name = "static_vcruntime" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a88c15d6fe7210ea80c578b53855615fdea0188e1630b6d9c377e1b2f2c098fa" +[[package]] +name = "stdweb" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5" +dependencies = [ + "discard", + "rustc_version", + "stdweb-derive", + "stdweb-internal-macros", + "stdweb-internal-runtime", + "wasm-bindgen", +] + +[[package]] +name = "stdweb-derive" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "serde_derive", + "syn", +] + +[[package]] +name = "stdweb-internal-macros" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11" +dependencies = [ + "base-x", + "proc-macro2", + "quote", + "serde", + "serde_derive", + "serde_json", + "sha1", + "syn", +] + +[[package]] +name = "stdweb-internal-runtime" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" + [[package]] name = "syn" version = "1.0.75" @@ -1005,6 +1150,44 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "time" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242" +dependencies = [ + "const_fn", + "libc", + "standback", + "stdweb", + "time-macros", + "version_check", + "winapi", +] + +[[package]] +name = "time-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1" +dependencies = [ + "proc-macro-hack", + "time-macros-impl", +] + +[[package]] +name = "time-macros-impl" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3c141a1b43194f3f56a1411225df8646c55781d5f26db825b3d98507eb482f" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "standback", + "syn", +] + [[package]] name = "tinyvec" version = "1.3.1" @@ -1131,6 +1314,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + [[package]] name = "want" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index bdab95e..ff8e29c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rust-fetch" -version = "0.6.18" +version = "0.6.19" authors = ["rumblefrog "] edition = "2018" @@ -22,7 +22,8 @@ features = ["rt-multi-thread"] git = "https://github.com/TextsHQ/reqwest" branch = "master" default-features = false -features = ["native-tls-alpn", "gzip", "brotli"] +# Cookies is used for the time_jar in order to invoke pre-redirect headers sets in reqwest. +features = ["native-tls-alpn", "gzip", "brotli", "cookies"] [dependencies.neon] git = "https://github.com/neon-bindings/neon" diff --git a/lib/index.ts b/lib/index.ts index 60fc6c9..d5763ce 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -121,6 +121,13 @@ export interface Response { * Each header may have more than one value in the value array. */ headers: Record; + + /** + * New cookies present since request time. + * + * URL => cookies[] + */ + newCookies: Record; } export class Client { @@ -179,16 +186,12 @@ export class Client { args.body = ( args.body).getBuffer(); } - const res = await requestPromise.call(this.#client, url, args); + const res: Response = await requestPromise.call(this.#client, url, args); - for (const [k, v] of Object.entries(res.headers)) { - if (args.cookieJar && k === 'set-cookie') { - if (Array.isArray(v)) { - for (const item of v as string[]) { - args.cookieJar.setCookieSync(item, url); - } - } else { - args.cookieJar.setCookieSync(v as string, url); + if (args.cookieJar) { + for (const [k, v] of Object.entries(res.newCookies)) { + for (const item of v) { + args.cookieJar.setCookieSync(item, k); } } } diff --git a/package.json b/package.json index 4e9dd2b..36ab18a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "rust-fetch", - "version": "0.6.18", + "version": "0.6.19", "description": "Rust HTTP wrapper for JS", "main": "dist/index.js", "files": [ diff --git a/src/builder.rs b/src/builder.rs index 0cf91ce..2f3b1d5 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -11,6 +11,7 @@ use reqwest::redirect::Policy; use reqwest::ClientBuilder; use crate::client::Client; +use crate::time_jar::TimeJar; pub struct Builder(Option); @@ -177,6 +178,10 @@ impl Builder { cb.client = cb.client.http2_initial_stream_window_size(1024 * 256 * 24); + let time_jar = std::sync::Arc::new(TimeJar::default()); + + cb.client = cb.client.cookie_provider(time_jar.clone()); + let client = cb.client.build().unwrap(); Ok(JsBox::new( @@ -184,6 +189,7 @@ impl Builder { Client { runtime: Runtime::new().unwrap(), client, + time_jar, }, )) } diff --git a/src/client.rs b/src/client.rs index 402509a..595a985 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,7 +1,8 @@ use std::collections::HashMap; use std::convert::TryInto; use std::str::FromStr; -use std::time::Duration; +use std::sync::Arc; +use std::time::{Duration, Instant}; use bytes::Bytes; @@ -15,12 +16,16 @@ use tokio::runtime::Runtime; use reqwest::header::HeaderMap; use reqwest::{Body, Client as ReqwestClient, Error, Method, Response}; +use crate::time_jar::{TimeJar, NewCookies}; + pub const RETRY_DURATION: Duration = Duration::from_millis(200); pub struct Client { pub(crate) runtime: Runtime, pub(crate) client: ReqwestClient, + + pub(crate) time_jar: Arc, } #[derive(Debug)] @@ -62,6 +67,8 @@ pub struct CallbackPayload { content_length: Option, data: DataType, + + new_cookies: Vec, } impl Finalize for Client {} @@ -151,6 +158,7 @@ impl Client { pub async fn map_response( res: Result, response_type: ResponseType, + new_cookies: Vec, ) -> Result { match res { Ok(res) => { @@ -190,6 +198,7 @@ impl Client { headers, content_length, data, + new_cookies, }) } @@ -231,6 +240,24 @@ impl Client { h }; + let new_cookies = { + let h = JsObject::new(cx); + + for (k, v) in payload.new_cookies { + let val = JsArray::new(cx, v.len() as u32); + + for (i, entry) in v.iter().enumerate() { + let z = cx.string(entry.to_string()); + + val.set(cx, i as u32, z)?; + } + + h.set(cx, k.as_ref(), val)?; + } + + h + }; + if let Some(content_length) = payload.content_length { let val = cx.number(content_length); @@ -265,6 +292,7 @@ impl Client { obj.set(cx, "statusCode", status)?; obj.set(cx, "httpVersion", http_version)?; obj.set(cx, "headers", headers)?; + obj.set(cx, "newCookies", new_cookies)?; Ok(obj) } @@ -356,7 +384,11 @@ impl Client { let queue = cx.channel(); + let time_jar = this.time_jar.clone(); + this.runtime.spawn(async move { + let request_time = Instant::now(); + let res = FutureRetry::new( || builder.try_clone().unwrap().send(), Attempter::new(method, attempts), @@ -371,7 +403,9 @@ impl Client { r }); - let res = Self::map_response(res, response_type).await; + let new_cookies = time_jar.cookies_since(request_time); + + let res = Self::map_response(res, response_type, new_cookies).await; queue.send(|mut cx| { let cb = callback.into_inner(&mut cx); diff --git a/src/lib.rs b/src/lib.rs index b5b2033..349c8f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ use neon::prelude::*; mod builder; mod client; +mod time_jar; use builder::Builder; use client::Client; diff --git a/src/time_jar.rs b/src/time_jar.rs new file mode 100644 index 0000000..937f0f2 --- /dev/null +++ b/src/time_jar.rs @@ -0,0 +1,56 @@ +use std::time::Instant; +use std::ops::Bound::{Excluded, Unbounded}; +use std::sync::RwLock; +use std::collections::BTreeMap; + +use reqwest::Url; +use reqwest::header::HeaderValue; +use reqwest::cookie::CookieStore; + +pub type NewCookies = (String, Vec); + +/// Time based jar. +/// +/// The motivation behind is that we cannot set a dedicate jar for each request. +/// And recreating the client for each request seems wasteful. +/// +/// Therefore we need to track cookies by time to fetch cookies since request time. +/// +/// This is rather crude, but it works, and be later improved when better support lands in reqwest. +/// Namely: https://github.com/seanmonstar/reqwest/issues/353 +pub struct TimeJar(RwLock>); + +impl TimeJar { + pub fn cookies_since(&self, time: Instant) -> Vec { + let mut cookies = Vec::new(); + let jar = self.0.read().unwrap(); + + for (_t, v) in jar.range((Excluded(time), Unbounded)) { + cookies.push(v.to_owned()) + } + + cookies + } + +} + +impl Default for TimeJar { + fn default() -> Self { + TimeJar(RwLock::new(BTreeMap::new())) + } +} + +impl CookieStore for TimeJar { + fn set_cookies(&self, cookie_headers: &mut dyn Iterator, url: &Url) { + let mut jar = self.0.write().unwrap(); + + let cookies = cookie_headers.map(|h| h.to_str().unwrap().to_owned()).collect(); + + jar.insert(Instant::now(), (url.origin().ascii_serialization(), cookies)); + } + + // Time jar is not designed to serve cookies for requests, that is the job of the JS caller. + fn cookies(&self, _url: &Url) -> Option { + None + } +}