From 4a9243ce1a9645651cd4494835670ffa7c2bafcd Mon Sep 17 00:00:00 2001 From: James Munns Date: Mon, 8 Jul 2024 19:23:53 +0200 Subject: [PATCH] Implement file server based on pandora-web-server's module (#48) This pull request introduces basic static page hosting abilities, based on the pandora-web-server project. This is implemented as a separate service kind available for configuration. --- Cargo.toml | 10 +++ source/river/Cargo.toml | 7 ++ source/river/assets/test-config.kdl | 17 ++++ source/river/src/config/internal.rs | 12 +++ source/river/src/config/kdl/mod.rs | 118 ++++++++++++++++++++++++++-- source/river/src/config/kdl/test.rs | 37 ++++++++- source/river/src/config/toml.rs | 1 + source/river/src/files.rs | 104 ++++++++++++++++++++++++ source/river/src/main.rs | 45 ++++++++++- source/river/src/proxy/mod.rs | 40 ++-------- 10 files changed, 348 insertions(+), 43 deletions(-) create mode 100644 source/river/src/files.rs diff --git a/Cargo.toml b/Cargo.toml index abf9d7b..a8d0d55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,3 +49,13 @@ rev = "12ca93c6b187a68ff9a526b4c4e669f602244366" git = "https://github.com/memorysafety/pingora.git" rev = "12ca93c6b187a68ff9a526b4c4e669f602244366" # path = "../pingora/pingora-proxy" + +[patch.crates-io.static-files-module] +git = "https://github.com/jamesmunns/pandora-web-server.git" +rev = "f3a84be5d3be0daa65303f62aa01d0b57e7cc708" +# path = "../pandora-web-server/static-files-module" + +[patch.crates-io.pandora-module-utils] +git = "https://github.com/jamesmunns/pandora-web-server.git" +rev = "f3a84be5d3be0daa65303f62aa01d0b57e7cc708" +# path = "../pandora-web-server/pandora-module-utils" diff --git a/source/river/Cargo.toml b/source/river/Cargo.toml index 448dab6..356f89f 100644 --- a/source/river/Cargo.toml +++ b/source/river/Cargo.toml @@ -32,6 +32,13 @@ tracing = "0.1.40" kdl = "4.6.0" miette = { version = "5.10.0", features = ["fancy"] } thiserror = "1.0.61" +http = "1.0.0" + +[dependencies.static-files-module] +version = "0.2" + +[dependencies.pandora-module-utils] +version = "0.2" [dependencies.tracing-subscriber] version = "0.3.18" diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl index 11eed28..30f9ecb 100644 --- a/source/river/assets/test-config.kdl +++ b/source/river/assets/test-config.kdl @@ -55,4 +55,21 @@ services { "91.107.223.4:80" } } + + // This is a third service, this one is a file server + Example3 { + // Same as proxy services, we support multiple listeners, and require + // at least one. + listeners { + "0.0.0.0:9000" + "0.0.0.0:9443" cert-path="./assets/test.crt" key-path="./assets/test.key" + } + // File servers have additional configuration items + file-server { + // The base path is what will be used as the "root" of the file server + // + // All files within the root will be available + base-path "." + } + } } diff --git a/source/river/src/config/internal.rs b/source/river/src/config/internal.rs index e60a66f..c7d43cc 100644 --- a/source/river/src/config/internal.rs +++ b/source/river/src/config/internal.rs @@ -21,6 +21,7 @@ pub struct Config { pub validate_configs: bool, pub threads_per_service: usize, pub basic_proxies: Vec, + pub file_servers: Vec, } impl Config { @@ -70,6 +71,16 @@ pub struct PathControl { pub(crate) upstream_response_filters: Vec>, } +// +// File Server Configuration +// +#[derive(Debug, Clone)] +pub struct FileServerConfig { + pub(crate) name: String, + pub(crate) listeners: Vec, + pub(crate) base_path: Option, +} + // // Basic Proxy Configuration // @@ -150,6 +161,7 @@ impl Default for Config { validate_configs: false, threads_per_service: 8, basic_proxies: vec![], + file_servers: vec![], } } } diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs index a117652..ae74b5d 100644 --- a/source/river/src/config/kdl/mod.rs +++ b/source/river/src/config/kdl/mod.rs @@ -1,4 +1,8 @@ -use std::{collections::BTreeMap, net::SocketAddr, path::PathBuf}; +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + net::SocketAddr, + path::PathBuf, +}; use kdl::{KdlDocument, KdlEntry, KdlNode}; use miette::{bail, Diagnostic, SourceSpan}; @@ -6,8 +10,8 @@ use pingora::upstreams::peer::HttpPeer; use crate::{ config::internal::{ - Config, DiscoveryKind, HealthCheckKind, ListenerConfig, ListenerKind, PathControl, - ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions, + Config, DiscoveryKind, FileServerConfig, HealthCheckKind, ListenerConfig, ListenerKind, + PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions, }, proxy::request_selector::{ null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector, @@ -24,31 +28,86 @@ impl TryFrom for Config { fn try_from(value: KdlDocument) -> Result { let threads_per_service = extract_threads_per_service(&value)?; - let basic_proxies = extract_services(&value)?; + let (basic_proxies, file_servers) = extract_services(&value)?; Ok(Config { threads_per_service, basic_proxies, + file_servers, ..Config::default() }) } } /// Extract all services from the top level document -fn extract_services(doc: &KdlDocument) -> miette::Result> { +fn extract_services( + doc: &KdlDocument, +) -> miette::Result<(Vec, Vec)> { let service_node = utils::required_child_doc(doc, doc, "services")?; let services = utils::wildcard_argless_child_docs(doc, service_node)?; + let proxy_node_set = HashSet::from(["listeners", "connectors", "path-control"]); + let file_server_node_set = HashSet::from(["listeners", "file-server"]); + let mut proxies = vec![]; + let mut file_servers = vec![]; + for (name, service) in services { - proxies.push(extract_service(doc, name, service)?); + // First, visit all of the children nodes, and make sure each child + // node only appears once. This is used to detect duplicate sections + let mut fingerprint_set: HashSet<&str> = HashSet::new(); + for ch in service.nodes() { + let name = ch.name().value(); + let dupe = !fingerprint_set.insert(name); + if dupe { + return Err( + Bad::docspan(format!("Duplicate section: '{name}'!"), doc, ch.span()).into(), + ); + } + } + + // Now: what do we do with this node? + if fingerprint_set.is_subset(&proxy_node_set) { + // If the contained nodes are a strict subset of proxy node config fields, + // then treat this section as a proxy node + proxies.push(extract_service(doc, name, service)?); + } else if fingerprint_set.is_subset(&file_server_node_set) { + // If the contained nodes are a strict subset of the file server config + // fields, then treat this section as a file server node + file_servers.push(extract_file_server(doc, name, service)?); + } else { + // Otherwise, we're not sure what this node is supposed to be! + // + // Obtain the superset of ALL potential nodes, which is essentially + // our configuration grammar. + let superset: HashSet<&str> = proxy_node_set + .union(&file_server_node_set) + .cloned() + .collect(); + + // Then figure out what fields our fingerprint set contains that + // is "novel", or basically fields we don't know about + let what = fingerprint_set + .difference(&superset) + .copied() + .collect::>() + .join(", "); + + // Then inform the user about the reason for our discontent + return Err(Bad::docspan( + format!("Unknown configuration section(s): {what}"), + doc, + service.span(), + ) + .into()); + } } - if proxies.is_empty() { + if proxies.is_empty() && file_servers.is_empty() { return Err(Bad::docspan("No services defined", doc, service_node.span()).into()); } - Ok(proxies) + Ok((proxies, file_servers)) } /// Collects all the filters, where the node name must be "filter", and the rest of the args @@ -91,6 +150,49 @@ fn collect_filters( Ok(fout) } +/// Extracts a single file server from the `services` block +fn extract_file_server( + doc: &KdlDocument, + name: &str, + node: &KdlDocument, +) -> miette::Result { + // Listeners + // + let listener_node = utils::required_child_doc(doc, node, "listeners")?; + let listeners = utils::data_nodes(doc, listener_node)?; + if listeners.is_empty() { + return Err(Bad::docspan("nonzero listeners required", doc, listener_node.span()).into()); + } + let mut list_cfgs = vec![]; + for (node, name, args) in listeners { + let listener = extract_listener(doc, node, name, args)?; + list_cfgs.push(listener); + } + + // Base Path + // + let fs_node = utils::required_child_doc(doc, node, "file-server")?; + let data_nodes = utils::data_nodes(doc, fs_node)?; + let mut map = HashMap::new(); + for (node, name, args) in data_nodes { + map.insert(name, (node, args)); + } + + let base_path = if let Some((bpnode, bpargs)) = map.get("base-path") { + let val = + utils::extract_one_str_arg(doc, bpnode, "base-path", bpargs, |a| Some(a.to_string()))?; + Some(val.into()) + } else { + None + }; + + Ok(FileServerConfig { + name: name.to_string(), + listeners: list_cfgs, + base_path, + }) +} + /// Extracts a single service from the `services` block fn extract_service( doc: &KdlDocument, diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs index da83dd8..e3a5b05 100644 --- a/source/river/src/config/kdl/test.rs +++ b/source/river/src/config/kdl/test.rs @@ -3,7 +3,9 @@ use std::{collections::BTreeMap, net::SocketAddr}; use pingora::upstreams::peer::HttpPeer; use crate::{ - config::internal::{ListenerConfig, ListenerKind, ProxyConfig, UpstreamOptions}, + config::internal::{ + FileServerConfig, ListenerConfig, ListenerKind, ProxyConfig, UpstreamOptions, + }, proxy::request_selector::uri_path_selector, }; @@ -93,11 +95,33 @@ fn load_test() { upstream_options: UpstreamOptions::default(), }, ], + file_servers: vec![FileServerConfig { + name: "Example3".into(), + listeners: vec![ + ListenerConfig { + source: crate::config::internal::ListenerKind::Tcp { + addr: "0.0.0.0:9000".into(), + tls: None, + }, + }, + ListenerConfig { + source: crate::config::internal::ListenerKind::Tcp { + addr: "0.0.0.0:9443".into(), + tls: Some(crate::config::internal::TlsConfig { + cert_path: "./assets/test.crt".into(), + key_path: "./assets/test.key".into(), + }), + }, + }, + ], + base_path: Some(".".into()), + }], }; assert_eq!(val.validate_configs, expected.validate_configs); assert_eq!(val.threads_per_service, expected.threads_per_service); assert_eq!(val.basic_proxies.len(), expected.basic_proxies.len()); + assert_eq!(val.file_servers.len(), expected.file_servers.len()); for (abp, ebp) in val.basic_proxies.iter().zip(expected.basic_proxies.iter()) { let ProxyConfig { @@ -120,6 +144,17 @@ fn load_test() { }); assert_eq!(*path_control, ebp.path_control); } + + for (afs, efs) in val.file_servers.iter().zip(expected.file_servers.iter()) { + let FileServerConfig { + name, + listeners, + base_path, + } = afs; + assert_eq!(*name, efs.name); + assert_eq!(*listeners, efs.listeners); + assert_eq!(*base_path, efs.base_path); + } } /// Empty: not allowed diff --git a/source/river/src/config/toml.rs b/source/river/src/config/toml.rs index c3b5d27..392019e 100644 --- a/source/river/src/config/toml.rs +++ b/source/river/src/config/toml.rs @@ -377,6 +377,7 @@ pub mod test { upstream_options: UpstreamOptions::default(), }, ], + file_servers: Vec::new(), }; let mut cfg = internal::Config::default(); diff --git a/source/river/src/files.rs b/source/river/src/files.rs new file mode 100644 index 0000000..7858cd5 --- /dev/null +++ b/source/river/src/files.rs @@ -0,0 +1,104 @@ +//! File Serving + +use std::ops::{Deref, DerefMut}; + +use pandora_module_utils::{pingora::SessionWrapper, RequestFilter, RequestFilterResult}; +use pingora::{server::Server, upstreams::peer::HttpPeer}; +use pingora_core::Result; +use pingora_proxy::{ProxyHttp, Session}; +use static_files_module::{StaticFilesConf, StaticFilesHandler}; + +use crate::{config::internal::FileServerConfig, populate_listners}; + +/// Create a new file serving service +pub fn river_file_server( + conf: FileServerConfig, + server: &Server, +) -> Box { + let fsconf = StaticFilesConf { + root: conf.base_path, + canonicalize_uri: true, + index_file: Vec::new().into(), + page_404: None, + precompressed: Vec::new().into(), + }; + let file_server = FileServer { + server: StaticFilesHandler::try_from(fsconf) + .expect("Creation of a Static File Service should not fail"), + }; + let mut my_proxy = + pingora_proxy::http_proxy_service_with_name(&server.configuration, file_server, &conf.name); + + populate_listners(conf.listeners, &mut my_proxy); + + Box::new(my_proxy) +} + +pub struct FileServer { + pub server: StaticFilesHandler, +} + +/// Implementation detail for integrating pingora-web-server's file server +/// +/// This wraps the [Session] provided by pingora in a way necessary for pandora. +pub struct SesWrap<'a> { + extensions: &'a mut http::Extensions, + session: &'a mut Session, +} + +#[async_trait::async_trait] +impl<'a> SessionWrapper for SesWrap<'a> { + fn extensions(&self) -> &http::Extensions { + self.extensions + } + + fn extensions_mut(&mut self) -> &mut http::Extensions { + self.extensions + } +} + +impl<'a> Deref for SesWrap<'a> { + type Target = Session; + + fn deref(&self) -> &Self::Target { + &*self.session + } +} + +impl<'a> DerefMut for SesWrap<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.session + } +} + +/// A small wrapper for delegating requests to a file server +#[async_trait::async_trait] +impl ProxyHttp for FileServer { + type CTX = http::Extensions; + + fn new_ctx(&self) -> Self::CTX { + http::Extensions::new() + } + + async fn upstream_peer( + &self, + _session: &mut Session, + _ctx: &mut Self::CTX, + ) -> Result> { + // This should never happen - we fully handle the request at the + // `request_filter` stage, so no requests should make it to the + // later `upstream_peer` stage. + Err(pingora_core::Error::new_str("Request Failed")) + } + + async fn request_filter(&self, session: &mut Session, ctx: &mut Self::CTX) -> Result { + let mut wrap = SesWrap { + extensions: ctx, + session, + }; + match self.server.request_filter(&mut wrap, &mut ()).await? { + RequestFilterResult::ResponseSent => Ok(true), + _ => Err(pingora_core::Error::new_str("Request Failed")), + } + } +} diff --git a/source/river/src/main.rs b/source/river/src/main.rs index cef22b4..2ec0bc4 100644 --- a/source/river/src/main.rs +++ b/source/river/src/main.rs @@ -1,7 +1,9 @@ mod config; +mod files; mod proxy; -use crate::proxy::river_proxy_service; +use crate::{files::river_file_server, proxy::river_proxy_service}; +use config::internal::{ListenerConfig, ListenerKind}; use pingora::{server::Server, services::Service}; fn main() { @@ -32,6 +34,12 @@ fn main() { services.push(service); } + for fs in conf.file_servers { + tracing::info!("Configuring File Server: {}", fs.name); + let service = river_file_server(fs, &my_server); + services.push(service); + } + // Now we hand it over to pingora to run forever. tracing::info!("Bootstrapping..."); my_server.bootstrap(); @@ -40,3 +48,38 @@ fn main() { tracing::info!("Starting Server..."); my_server.run_forever(); } + +pub fn populate_listners( + listeners: Vec, + service: &mut pingora_core::services::listening::Service, +) { + for list_cfg in listeners { + // NOTE: See https://github.com/cloudflare/pingora/issues/182 for tracking "paths aren't + // always UTF-8 strings". + // + // See also https://github.com/cloudflare/pingora/issues/183 for tracking "ip addrs shouldn't + // be strings" + match list_cfg.source { + ListenerKind::Tcp { + addr, + tls: Some(tls_cfg), + } => { + let cert_path = tls_cfg + .cert_path + .to_str() + .expect("cert path should be utf8"); + let key_path = tls_cfg.key_path.to_str().expect("key path should be utf8"); + service + .add_tls(&addr, cert_path, key_path) + .expect("adding TLS listener shouldn't fail"); + } + ListenerKind::Tcp { addr, tls: None } => { + service.add_tcp(&addr); + } + ListenerKind::Uds(path) => { + let path = path.to_str().unwrap(); + service.add_uds(path, None); // todo + } + } + } +} diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs index 5dece5a..abb2c29 100644 --- a/source/river/src/proxy/mod.rs +++ b/source/river/src/proxy/mod.rs @@ -20,12 +20,14 @@ use pingora_load_balancing::{ use pingora_proxy::{ProxyHttp, Session}; use crate::{ - config::internal::{ListenerKind, PathControl, ProxyConfig, SelectionKind}, - proxy::request_modifiers::RequestModifyMod, + config::internal::{PathControl, ProxyConfig, SelectionKind}, + populate_listners, + proxy::{ + request_modifiers::RequestModifyMod, request_selector::RequestSelector, + response_modifiers::ResponseModifyMod, + }, }; -use self::{request_selector::RequestSelector, response_modifiers::ResponseModifyMod}; - pub mod request_modifiers; pub mod request_selector; pub mod response_modifiers; @@ -98,35 +100,7 @@ where &conf.name, ); - for list_cfg in conf.listeners { - // NOTE: See https://github.com/cloudflare/pingora/issues/182 for tracking "paths aren't - // always UTF-8 strings". - // - // See also https://github.com/cloudflare/pingora/issues/183 for tracking "ip addrs shouldn't - // be strings" - match list_cfg.source { - ListenerKind::Tcp { - addr, - tls: Some(tls_cfg), - } => { - let cert_path = tls_cfg - .cert_path - .to_str() - .expect("cert path should be utf8"); - let key_path = tls_cfg.key_path.to_str().expect("key path should be utf8"); - my_proxy - .add_tls(&addr, cert_path, key_path) - .expect("adding TLS listener shouldn't fail"); - } - ListenerKind::Tcp { addr, tls: None } => { - my_proxy.add_tcp(&addr); - } - ListenerKind::Uds(path) => { - let path = path.to_str().unwrap(); - my_proxy.add_uds(path, None); // todo - } - } - } + populate_listners(conf.listeners, &mut my_proxy); Box::new(my_proxy) }