Skip to content

Commit

Permalink
Add service using keepalived & ipvs
Browse files Browse the repository at this point in the history
  • Loading branch information
byrnedo committed Aug 23, 2024
1 parent 0c6b09a commit a3c3f8a
Show file tree
Hide file tree
Showing 28 changed files with 590 additions and 106 deletions.
30 changes: 19 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ Dns is coredns with fanout between all nodes along with serving from file.

Hosts are maintained via a CNI plugin that adds/removes the ip to the hosts file.

Pods get a hostname of `<labels.app>.<metadata.namespace>.cluster.skate.`
Pods get a hostname of `<name>.<namespace>.pod.cluster.skate.`
Services get `<name>.<namespace>.svc.cluster.skate.`

### Ingress

Expand All @@ -141,14 +142,11 @@ spec:
pathType: Prefix
backend:
service:
name: mypod.myns.cluster.skate
name: mypod.myns # routes to mypod.myns.svc.cluster.skate
port:
number: 80
```
Service resources are ignored and it's implicit that a pod has a service with
url: `<labels.name>.<metadata.namespace>.cluster.skate`

Currently only Prefix pathType is supported.
Supported annotations:
Expand Down Expand Up @@ -332,10 +330,20 @@ sudo apt-get install -y gcc make libssl-dev pkg-config
- [ ] Get pod config from store and not podman


### DNS Improvements
### Service Improvements

#### Pre work
1. Deploy keepalived on allnodes
2. Apply static ips to pods.

#### Deploying service

1. Modify keepalived.conf on all nodes to have service ips
2. Assign ip to keepalive2 service.
2. Create a dns entry for <name>.<ns>.svc.cluster.skate that points to keepalived

Or

1. Mod coredns to fanout to all nodes and wait for all responses, and round robin the responses.
2. Make these dns records available as <name>.<namespace>.pod.cluster.skate
3. Mod ingress to apply Service resources, making them available as <name>.<namespace>.svc.cluster.skate, proxying to the
pod domains.
4. Make nginx proxy to next healthy upstream upon connection failure.
1. Assign ip to keepalive2 service.
2. Cron that queries dns for all services every n seconds and updates keepalived.conf and reloads it.
3. Create a dns entry for <name>.<ns>.svc.cluster.skate that points to keepalived
2 changes: 1 addition & 1 deletion hack/test-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
name: nginx
namespace: foo
spec:
replicas: 3
Expand Down
12 changes: 12 additions & 0 deletions hack/test-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: nginx
namespace: foo
spec:
selector:
app.kubernetes.io/name: nginx
ports:
- protocol: TCP
port: 80
targetPort: 80
2 changes: 1 addition & 1 deletion images/coredns/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,4 @@ require (
sigs.k8s.io/yaml v1.3.0 // indirect
)

replace github.com/networkservicemesh/fanout => github.com/skateco/fanout v0.0.0-20240821130608-7538dbcf5f9e
replace github.com/networkservicemesh/fanout => github.com/skateco/fanout v0.0.0-20240821133121-12157fa01a4d
6 changes: 2 additions & 4 deletions images/coredns/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -547,8 +547,6 @@ github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxzi
github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
github.com/networkservicemesh/fanout v1.9.2 h1:KF2PsFJSNUTvFXc1hMdqCOQ9lRqGN4V8lVg8fwa5HhA=
github.com/networkservicemesh/fanout v1.9.2/go.mod h1:EM8dDilQja7KTATYkS6En1OIdxyy19/n0ivm+ft6tDs=
github.com/nrdcg/auroradns v1.0.0/go.mod h1:6JPXKzIRzZzMqtTDgueIhTi6rFf1QvYE/HzqidhOhjw=
github.com/nrdcg/goinwx v0.6.1/go.mod h1:XPiut7enlbEdntAqalBIqcYcTEVhpv/dKWgDCX2SwKQ=
github.com/nrdcg/namesilo v0.2.1/go.mod h1:lwMvfQTyYq+BbjJd30ylEG4GPSS6PII0Tia4rRpRiyw=
Expand Down Expand Up @@ -682,8 +680,8 @@ github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrf
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/skateco/fanout v0.0.0-20240821130608-7538dbcf5f9e h1:wKU46K4+dPwlK+8pV7POCAbBri+VKKRyC0IhKGmSHQk=
github.com/skateco/fanout v0.0.0-20240821130608-7538dbcf5f9e/go.mod h1:EM8dDilQja7KTATYkS6En1OIdxyy19/n0ivm+ft6tDs=
github.com/skateco/fanout v0.0.0-20240821133121-12157fa01a4d h1:RynTo7/odyJf0omOAki6QoUA0izPazadCa1l91udW6E=
github.com/skateco/fanout v0.0.0-20240821133121-12157fa01a4d/go.mod h1:EM8dDilQja7KTATYkS6En1OIdxyy19/n0ivm+ft6tDs=
github.com/skratchdot/open-golang v0.0.0-20160302144031-75fb7ed4208c/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
Expand Down
2 changes: 1 addition & 1 deletion images/nginx-ingress/service.conf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
{{/inline}}

{{#*inline "proxyPassLocation"}}
set $upstream http://{{backend.service.name}}.cluster.skate:{{backend.service.port.number}};
set $upstream http://{{backend.service.name}}.svc.cluster.skate:{{backend.service.port.number}};
proxy_pass $upstream;
{{/inline}}

Expand Down
10 changes: 9 additions & 1 deletion manifests/coredns.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ spec:
hosts /var/lib/skate/dns/addnhosts
}
cluster.skate:53 {
svc.cluster.skate:53 {
bind lo
hosts /var/lib/skate/dns/addnhosts
}
pod.cluster.skate:53 {
bind lo
Expand Down
37 changes: 37 additions & 0 deletions src/config_cmd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use anyhow::anyhow;
use clap::{Args, Subcommand};
use crate::skate::ConfigFileArgs;

#[derive(Debug, Args)]
pub struct ConfigArgs{
#[command(flatten)]
config: ConfigFileArgs,
#[command(subcommand)]
command: ConfigCommands,
}

#[derive(Debug, Args)]
pub struct UseContextArgs{
pub context: String

}

#[derive(Debug, Subcommand)]
pub enum ConfigCommands {
UseContext(UseContextArgs),
}

pub fn config(args: ConfigArgs) -> Result<(), Box<dyn std::error::Error>> {
match args.command {
ConfigCommands::UseContext(use_context_args) => {
let mut config = crate::config::Config::load(Some(args.config.skateconfig.clone())).expect("failed to load skate config");
config.clusters.iter().any(|c| c.name == use_context_args.context)
.then(|| ())
.ok_or(anyhow!("no context exists with the name {}", use_context_args.context))?;
config.current_context = Some(use_context_args.context.clone());
config.persist(Some(args.config.skateconfig))?;
println!("Switched to context \"{}\"", use_context_args.context.replace("\"", ""));
}
}
Ok(())
}
13 changes: 12 additions & 1 deletion src/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,13 @@ async fn create_node(args: CreateNodeArgs) -> Result<(), Box<dyn Error>> {
let (all_conns, _) = cluster_connections(&cluster).await;
let all_conns = &all_conns.unwrap_or(SshClients { clients: vec!() });

let skate_dirs = [
"ingress",
"ingress/letsencrypt_storage",
"dns",
"keepalived"].map(|s| format!("/var/lib/skate/{}", s));

_ = conn.execute("sudo mkdir -p /var/lib/skate/ingress /var/lib/skate/ingress/letsencrypt_storage /var/lib/skate/dns").await?;
_ = conn.execute(&format!("sudo mkdir -p {}", skate_dirs.join(" "))).await?;
// _ = conn.execute("sudo podman rm -fa").await;

setup_networking(&conn, &all_conns, &cluster, &node).await?;
Expand Down Expand Up @@ -291,6 +296,12 @@ async fn install_cluster_manifests(args: &ConfigFileArgs, config: &Cluster) -> R
async fn setup_networking(conn: &SshClient, all_conns: &SshClients, cluster_conf: &Cluster, node: &Node) -> Result<(), Box<dyn Error>> {
let network_backend = "netavark";

conn.execute("sudo apt-get install -y keepalived").await?;
conn.execute(&format!("sudo bash -c -eu 'echo {}| base64 --decode > /etc/keepalived/keepalived.conf'", general_purpose::STANDARD.encode(include_str!("./resources/keepalived.conf")))).await?;
conn.execute("sudo systemctl enable keepalived").await?;
conn.execute("sudo systemctl start keepalived").await?;


if conn.execute("test -f /etc/containers/containers.conf").await.is_err() {
let cmd = "sudo cp /usr/share/containers/containers.conf /etc/containers/containers.conf";
conn.execute(cmd).await?;
Expand Down
2 changes: 2 additions & 0 deletions src/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub enum DeleteCommands {
Secret(DeleteResourceArgs),
Deployment(DeleteResourceArgs),
Daemonset(DeleteResourceArgs),
Service(DeleteResourceArgs),
}

#[derive(Debug, Args)]
Expand All @@ -45,6 +46,7 @@ pub async fn delete(args: DeleteArgs) -> Result<(), Box<dyn Error>> {
DeleteCommands::Ingress(args) => delete_resource(ResourceType::Ingress, args).await?,
DeleteCommands::Cronjob(args) => delete_resource(ResourceType::CronJob, args).await?,
DeleteCommands::Secret(args) => delete_resource(ResourceType::Secret, args).await?,
DeleteCommands::Service(args) => delete_resource(ResourceType::Service, args).await?,
}
Ok(())
}
Expand Down
130 changes: 122 additions & 8 deletions src/executor.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
use std::error::Error;
use std::fs::File;
use std::io::{Write};
use std::process;
use std::io::{read_to_string, BufRead, Read, Seek, SeekFrom, Write};
use std::net::{IpAddr, Ipv4Addr};
use std::{fs, process};
use std::process::Stdio;

use std::str::FromStr;
use anyhow::anyhow;
use handlebars::Handlebars;
use itertools::Itertools;
use k8s_openapi::api::apps::v1::{DaemonSet, Deployment};
use k8s_openapi::api::batch::v1::CronJob;
use k8s_openapi::api::core::v1::{Pod, Secret};
use k8s_openapi::api::core::v1::{Pod, Secret, Service};
use k8s_openapi::api::networking::v1::Ingress;
use log::info;
use serde_json::{json, Value};

use crate::cron::cron_to_systemd;
use crate::filestore::FileStore;
use crate::skate::{exec_cmd, SupportedResources};
use crate::util::{hash_string, metadata_name};
use crate::skatelet::dns;
use crate::util::{hash_string, lock_file, metadata_name};

pub trait Executor {
fn apply(&self, manifest: &str) -> Result<(), Box<dyn Error>>;
Expand Down Expand Up @@ -128,10 +131,11 @@ impl DefaultExecutor {
let mut file = std::fs::OpenOptions::new().write(true).create(true).truncate(true).open(&format!("/etc/systemd/system/skate-cronjob-{}.timer", &ns_name.to_string()))?;
file.write_all(output.as_bytes())?;

let unit_name = format!("skate-cronjob-{}", &ns_name.to_string());

// systemctl daemon-reload
exec_cmd("systemctl", &["daemon-reload"])?;
exec_cmd("systemctl", &["enable", "--now", &format!("skate-cronjob-{}", &ns_name.to_string())])?;
exec_cmd("systemctl", &["enable", "--now", &unit_name])?;
exec_cmd("systemctl", &["reset-failed", &unit_name]);

Ok(())
}
Expand Down Expand Up @@ -287,6 +291,107 @@ impl DefaultExecutor {
Ok(())
}

fn apply_service(&self, service: Service) -> Result<(), Box<dyn Error>> {
let manifest_string = serde_yaml::to_string(&service).map_err(|e| anyhow!(e).context("failed to serialize manifest to yaml"))?;
let name = &metadata_name(&service).to_string();

// manifest goes into store
let yaml_path = self.store.write_file("service", name, "manifest.yaml", manifest_string.as_bytes())?;

let hash = service.metadata.labels.as_ref().and_then(|m| m.get("skate.io/hash")).unwrap_or(&"".to_string()).to_string();

if !hash.is_empty() {
self.store.write_file("service", &name, "hash", &hash.as_bytes())?;
}

// install systemd service and timer
let mut handlebars = Handlebars::new();
handlebars.set_strict_mode(true);
////////////////////////////////////////////////////
// template cron-pod.service to /var/lib/state/store/cronjob/<name>/systemd.service
////////////////////////////////////////////////////

handlebars.register_template_string("unit", include_str!("./resources/skate-ipvsmon.service")).map_err(|e| anyhow!(e).context("failed to load service template file"))?;


// cidr is 10.30.0.0/16
// we just keep incrementing
let service_subnet_start = "10.30.0.0";

let ip = lock_file("/var/lib/skate/keepalived/service-ips.lock", Box::new(move || {
info!("reading ip file");


let last_ip = fs::read_to_string("/var/lib/skate/keepalived/service-ips").unwrap_or_default();
info!("converting {} to Ipv4Addr", last_ip);
let mut last_ip = Ipv4Addr::from_str(&last_ip).unwrap_or_else(|_| Ipv4Addr::from_str(service_subnet_start).unwrap());

info!("last ip: {}", last_ip);

let mut octets = last_ip.octets();

if octets[3] == 255 {
if octets[2] == 255 {
return Err(anyhow!("no more ips available on subnet {}/16", service_subnet_start).into());
}
octets[2] += 1;
octets[3] = 0;
} else {
octets[3] += 1;
}

let ip = Ipv4Addr::from(octets);

fs::write("/var/lib/skate/keepalived/service-ips", ip.to_string())?;

Ok(ip.to_string())
}))?;

let json: Value = json!({
"svc_name":name,
"ip": ip,
"yaml_path": yaml_path,
});

let file = std::fs::OpenOptions::new().write(true).create(true).truncate(true).open(&format!("/etc/systemd/system/skate-ipvsmon-{}.service", &name))?;
handlebars.render_to_write("unit", &json, file)?;

handlebars.register_template_string("timer", include_str!("./resources/skate-ipvsmon.timer")).map_err(|e| anyhow!(e).context("failed to load timer template file"))?;
let json: Value = json!({
"svc_name":name,
});
let file = std::fs::OpenOptions::new().write(true).create(true).truncate(true).open(&format!("/etc/systemd/system/skate-ipvsmon-{}.timer", &name))?;
handlebars.render_to_write("timer", &json, file)?;
let unit_name = format!("skate-ipvsmon-{}", &name);

exec_cmd("systemctl", &["daemon-reload"])?;
exec_cmd("systemctl", &["enable", "--now", &unit_name])?;
exec_cmd("systemctl", &["reset-failed", &unit_name])?;

let domain = format!("{}.svc.cluster.skate", name);
dns::add_misc_host(ip, domain.clone(), domain)?;

Ok(())
}

fn remove_service(&self, service: Service) -> Result<(), Box<dyn Error>> {
let ns_name = metadata_name(&service);

let _ = exec_cmd("systemctl", &["stop", &format!("skate-ipvsmon-{}", &ns_name.to_string())]);

let _ = exec_cmd("systemctl", &["disable", &format!("skate-ipvsmon-{}", &ns_name.to_string())]);
let _ = exec_cmd("rm", &[&format!("/etc/systemd/system/skate-ipvsmon-{}.service", &ns_name.to_string())]);
let _ = exec_cmd("rm", &[&format!("/etc/systemd/system/skate-ipvsmon-{}.timer", &ns_name.to_string())]);
let _ = exec_cmd("rm", &[&format!("/var/lib/skate/keepalived/{}.conf", &ns_name.to_string())]);
let _ = exec_cmd("systemctl", &["daemon-reload"])?;
let _ = exec_cmd("systemctl", &["reset-failed"])?;

let _ = self.store.remove_object("service", &ns_name.to_string())?;

Ok(())
}


fn remove_deployment(&self, deployment: Deployment, grace_period: Option<usize>) -> Result<(), Box<dyn Error>> {
// find all pod ids for the deployment
let name = deployment.metadata.name.unwrap();
Expand Down Expand Up @@ -369,7 +474,10 @@ impl Executor for DefaultExecutor {
// just to check
let object: SupportedResources = serde_yaml::from_str(manifest).expect("failed to deserialize manifest");
match object {
SupportedResources::Pod(_) | SupportedResources::Secret(_) | SupportedResources::Deployment(_) | SupportedResources::DaemonSet(_) => {
SupportedResources::Pod(_)
| SupportedResources::Secret(_)
| SupportedResources::Deployment(_)
| SupportedResources::DaemonSet(_) => {
self.apply_play(object)
}
SupportedResources::Ingress(ingress) => {
Expand All @@ -378,6 +486,9 @@ impl Executor for DefaultExecutor {
SupportedResources::CronJob(cron) => {
self.apply_cronjob(cron)
}
SupportedResources::Service(service) => {
self.apply_service(service)
}
}
}

Expand All @@ -403,6 +514,9 @@ impl Executor for DefaultExecutor {
SupportedResources::Secret(secret) => {
self.remove_secret(secret)
}
SupportedResources::Service(service) => {
self.remove_service(service)
}
}
}
}
Loading

0 comments on commit a3c3f8a

Please sign in to comment.