Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make krun-server wait for child processes #34

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
297 changes: 272 additions & 25 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ rustix = { version = "0.38.34", default-features = false }
serde = { version = "1.0.203", default-features = false }
serde_json = { version = "1.0.117", default-features = false }
tempfile = { version = "3.10.1", default-features = false }
tokio = { version = "1.38.0", default-features = false }
tokio-stream = { version = "0.1.15", default-features = false }
utils = { path = "crates/utils", default-features = false }
2 changes: 1 addition & 1 deletion crates/krun-guest/src/net.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::fs;
use std::os::unix::process::ExitStatusExt;
use std::os::unix::process::ExitStatusExt as _;
use std::process::Command;

use anyhow::{anyhow, Context, Result};
Expand Down
3 changes: 1 addition & 2 deletions crates/krun-guest/src/pulse.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use std::collections::HashMap;
use std::env;
use std::fs;
use std::path::Path;
use std::process::{Command, Stdio};
use std::{env, fs};

use anyhow::{Context, Result};
use utils::env::find_in_path;
Expand Down
3 changes: 2 additions & 1 deletion crates/krun-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ anyhow = { workspace = true, features = ["std"] }
bpaf = { workspace = true, features = [] }
env_logger = { workspace = true, features = ["auto-color", "humantime", "unstable-kv"] }
log = { workspace = true, features = ["kv"] }
nix = { workspace = true, features = ["socket"] }
serde = { workspace = true, features = [] }
serde_json = { workspace = true, features = ["std"] }
tokio = { workspace = true, features = ["io-util", "macros", "net", "process", "rt-multi-thread", "sync"] }
tokio-stream = { workspace = true, features = ["net", "sync"] }
utils = { workspace = true, features = [] }

[features]
Expand Down
102 changes: 82 additions & 20 deletions crates/krun-server/src/bin/krun-server.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,94 @@
use std::net::TcpListener;
use std::os::fd::AsRawFd;
use std::panic;
use std::process::Command;
use std::os::unix::process::ExitStatusExt as _;

use anyhow::{Context, Result};
use anyhow::Result;
use krun_server::cli_options::options;
use krun_server::server::start_server;
use nix::sys::socket::{shutdown, Shutdown};
use krun_server::server::{start_server, State};
use log::error;
use tokio::net::TcpListener;
use tokio::process::Command;
use tokio::sync::watch;
use tokio_stream::wrappers::WatchStream;
use tokio_stream::StreamExt as _;

fn main() -> Result<()> {
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();

let options = options().run();

let listener = TcpListener::bind(format!("0.0.0.0:{}", options.server_port))?;
let listener_fd = listener.as_raw_fd();
let listener = TcpListener::bind(format!("0.0.0.0:{}", options.server_port)).await?;
let (state_tx, state_rx) = watch::channel(State {
connection_idle: true,
child_processes: 0,
});

let server_thread = start_server(listener);

Command::new(&options.command)
let server_handle = tokio::spawn(start_server(listener, state_tx));
tokio::pin!(server_handle);
let command_status = Command::new(&options.command)
.args(options.command_args)
.status()
.with_context(|| format!("Failed to execute command {:?}", options.command))?;
.status();
tokio::pin!(command_status);
let mut state_rx = WatchStream::new(state_rx);

shutdown(listener_fd, Shutdown::Both)?;
if let Err(err) = server_thread.join() {
panic::resume_unwind(err);
}
let mut server_died = false;
let mut command_exited = false;

Ok(())
loop {
tokio::select! {
res = &mut server_handle, if !server_died => {
// If an error is received here, accepting connections from the
// TCP listener failed due to non-transient errors and the
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: This is a lie. We're logging and ignoring all connection errors at this time... We should continue on transient errors but bail out on non-transient errors.

https://book.async.rs/patterns/accept-loop

// server is giving up and shutting down.
//
// Errors encountered when handling individual connections do
// not bubble up to this point.
if let Err(err) = res {
error!(err:% = err; "server task failed");
server_died = true;
}
},
res = &mut command_status, if !command_exited => {
match res {
Ok(status) => {
if !status.success() {
if let Some(code) = status.code() {
eprintln!(
"{:?} process exited with status code: {code}",
options.command
);
} else {
eprintln!(
"{:?} process terminated by signal: {}",
options.command,
status
.signal()
.expect("either one of status code or signal should be set")
);
}
}
},
Err(err) => {
eprintln!(
"Failed to execute {:?} as child process: {err}",
options.command
);
},
}
command_exited = true;
},
Some(state) = state_rx.next(), if command_exited => {
if state.connection_idle && state.child_processes == 0 {
// Server is idle (not currently handling an accepted
// incoming connection) and no more child processes.
// We're done.
return Ok(());
}
println!(
"Waiting for {} other commands launched through this krun server to exit...",
state.child_processes
);
println!("Press Ctrl+C to force quit");
},
}
}
}
160 changes: 127 additions & 33 deletions crates/krun-server/src/server.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,131 @@
use std::collections::HashMap;
use std::env;
use std::io::{BufRead, BufReader, Write};
use std::net::{TcpListener, TcpStream};
use std::process::{Command, Stdio};
use std::thread::{self, JoinHandle};
use std::os::unix::process::ExitStatusExt as _;
use std::path::PathBuf;
use std::process::Stdio;

use anyhow::{anyhow, Result};
use log::debug;
use utils::{launch::Launch, stdio::make_stdout_stderr};
use anyhow::{anyhow, Context, Result};
use log::{debug, error};
use tokio::io::{AsyncBufReadExt as _, AsyncWriteExt as _, BufStream};
use tokio::net::{TcpListener, TcpStream};
use tokio::process::{Child, Command};
use tokio::sync::watch;
use tokio::task::JoinSet;
use tokio_stream::wrappers::TcpListenerStream;
use tokio_stream::StreamExt as _;
use utils::launch::Launch;
use utils::stdio::make_stdout_stderr;

pub fn start_server(listener: TcpListener) -> JoinHandle<()> {
thread::spawn(move || {
if let Err(err) = work(listener) {
debug!(err:?; "server thread is terminating")
}
})
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct State {
pub connection_idle: bool,
pub child_processes: usize,
}

fn work(listener: TcpListener) -> Result<()> {
for stream in listener.incoming() {
let stream = stream?;
pub async fn start_server(listener: TcpListener, state_tx: watch::Sender<State>) {
let mut listener_stream = TcpListenerStream::new(listener);
let mut child_set = JoinSet::new();

loop {
tokio::select! {
Some(stream) = listener_stream.next() => {
state_tx.send_if_modified(|state| {
let connection_idle = false;
if state.connection_idle == connection_idle {
return false;
}
state.connection_idle = connection_idle;
true
});
let stream = match stream {
Ok(stream) => stream,
Err(err) => {
eprintln!("Failed to accept incoming connection: {err}");
state_tx.send_if_modified(|state| {
let connection_idle = true;
if state.connection_idle == connection_idle {
return false;
}
state.connection_idle = connection_idle;
true
});
continue;
},
};
let stream = BufStream::new(stream);

if let Err(err) = handle_connection(stream) {
println!("Error processing client request: {err:?}");
match handle_connection(stream).await {
Ok((command, mut child)) => {
child_set.spawn(async move { (command, child.wait().await) });
state_tx.send_if_modified(|state| {
let child_processes = child_set.len();
if state.child_processes == child_processes {
return false;
}
state.child_processes = child_processes;
true
});
},
Err(err) => {
eprintln!("Failed to process client request: {err:?}");
},
}
state_tx.send_if_modified(|state| {
let connection_idle = true;
if state.connection_idle == connection_idle {
return false;
}
state.connection_idle = connection_idle;
true
});
},
Some(res) = child_set.join_next() => {
match res {
Ok((command, res)) => match res {
Ok(status) => {
debug!(command:?; "child process exited");
if !status.success() {
if let Some(code) = status.code() {
eprintln!(
"{command:?} process exited with status code: {code}"
);
} else {
eprintln!(
"{command:?} process terminated by signal: {}",
status
.signal()
.expect(
"either one of status code or signal should be set"
)
);
}
}
},
Err(err) => {
eprintln!("Failed to wait for {command:?} process to exit: {err}");
},
},
Err(err) => {
error!(err:% = err; "child task failed");
},
}
state_tx.send_if_modified(|state| {
let child_processes = child_set.len();
if state.child_processes == child_processes {
return false;
}
state.child_processes = child_processes;
true
});
},
}
}

Ok(())
}

fn read_request(mut stream: &TcpStream) -> Result<Launch> {
let mut buf_reader = BufReader::new(&mut stream);
async fn read_request(stream: &mut BufStream<TcpStream>) -> Result<Launch> {
let mut buf = String::new();
loop {
if buf_reader.read_line(&mut buf)? == 0 {
if stream.read_line(&mut buf).await? == 0 {
return Err(anyhow!("unexpected EOF"));
}
if buf.contains("EOM") {
Expand All @@ -43,32 +135,34 @@ fn read_request(mut stream: &TcpStream) -> Result<Launch> {
}
}

fn handle_connection(mut stream: TcpStream) -> Result<()> {
async fn handle_connection(mut stream: BufStream<TcpStream>) -> Result<(PathBuf, Child)> {
let mut envs: HashMap<String, String> = env::vars().collect();

let Launch {
command,
command_args,
env,
} = read_request(&stream)?;
} = read_request(&mut stream).await?;
debug!(command:?, command_args:?, env:?; "received launch request");
envs.extend(env);

let (stdout, stderr) = make_stdout_stderr(&command, &envs)?;

let err = Command::new(&command)
let res = Command::new(&command)
.args(command_args)
.envs(envs)
.stdin(Stdio::null())
.stdout(stdout)
.stderr(stderr)
.spawn();
if let Err(err) = err {
let msg = format!("Failed to execute command {command:?}: {err}");
stream.write_all(msg.as_bytes()).ok();
.spawn()
.with_context(|| format!("Failed to execute {command:?} as child process"));
if let Err(err) = &res {
let msg = format!("{err:?}");
teohhanhui marked this conversation as resolved.
Show resolved Hide resolved
stream.write_all(msg.as_bytes()).await.ok();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

write_all is not cancellation safe. Expect Heisenbugs!

} else {
stream.write_all(b"OK").ok();
stream.write_all(b"OK").await.ok();
}
stream.flush().ok();
stream.flush().await.ok();

Ok(())
res.map(|child| (command, child))
}
2 changes: 1 addition & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
edition = "2021"

# empty_item_single_line = false
# empty_item_single_line = true
# error_on_line_overflow = true
# format_code_in_doc_comments = true
# format_strings = true
Expand Down