Skip to content

Commit

Permalink
feat: add split_images_with_label_filter
Browse files Browse the repository at this point in the history
4o3F committed Jan 15, 2025
1 parent d611adf commit 747c89e
Showing 2 changed files with 238 additions and 10 deletions.
205 changes: 199 additions & 6 deletions src/common/augment.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::{
collections::HashSet,
fs::{self},
path::PathBuf,
sync::{Arc, RwLock},
@@ -624,7 +625,7 @@ pub async fn filter_dataset_with_rgblist(
tracing::info!("Images filter done");
}

pub async fn split_images_with_filter(
pub async fn split_images_with_rgb_filter(
images_path: &String,
target_height: &u32,
target_width: &u32,
@@ -717,12 +718,12 @@ pub async fn split_images_with_filter(
let label_output_path = label_output_path.clone();
threads.spawn(async move {
let _ = permit.acquire().await.unwrap();
tracing::info!("Processing {}", entry.file_name().unwrap().to_str().unwrap());
tracing::info!(
"Processing {}",
entry.file_name().unwrap().to_str().unwrap()
);
let label_id = entry.file_stem().unwrap().to_str().unwrap().to_string();
let img = imgcodecs::imread(entry.to_str().unwrap(), imgcodecs::IMREAD_COLOR)
.unwrap();


let img = imgcodecs::imread(entry.to_str().unwrap(), imgcodecs::IMREAD_COLOR).unwrap();

let size = img.size().unwrap();
let (width, height) = (size.width, size.height);
@@ -823,6 +824,198 @@ pub async fn split_images_with_filter(
while threads.join_next().await.is_some() {}
}

pub async fn split_images_with_label_filter(
images_path: &String,
labels_path: &String,
target_height: &u32,
target_width: &u32,
) {
let valid_name_set: Arc<RwLock<HashSet<String>>> = Arc::new(RwLock::new(HashSet::new()));

let mut valid_name_set_writer = valid_name_set.write().unwrap();
fs::read_dir(labels_path)
.unwrap()
.map(|e| e.unwrap().path())
.for_each(|e| {
let name = e.file_stem().unwrap().to_str().unwrap().to_string();
valid_name_set_writer.insert(name);
});
drop(valid_name_set_writer);

let image_entries: Vec<PathBuf>;

let images_output_path: String;

let images_path = PathBuf::from(images_path.as_str());

if images_path.is_dir() {
image_entries = fs::read_dir(&images_path)
.unwrap()
.map(|e| e.unwrap().path())
.collect();

images_output_path = format!("{}/output/", images_path.to_str().unwrap());
} else {
image_entries = vec![images_path.clone()];
images_output_path = format!(
"{}/output/labels/",
images_path.parent().unwrap().to_str().unwrap()
);
}

let sem = Arc::new(Semaphore::new(
(*THREAD_POOL.read().expect_or_log("Get pool error")).into(),
));
tracing::info!("sem available permits: {}", sem.available_permits());
let mut threads = tokio::task::JoinSet::new();

match fs::create_dir_all(&images_output_path) {
Ok(_) => {
tracing::info!("Image output directory created");
}
Err(e) => {
if e.kind() == std::io::ErrorKind::AlreadyExists {
tracing::info!("Image output directory already exists");
} else {
tracing::error!("Failed to create directory: {}", e);
return ();
}
}
}

// Image Processing
let mut image_extension = None;
for entry in image_entries {
if !entry.is_file() {
continue;
}

if image_extension.is_none() {
let extension = entry
.extension()
.unwrap()
.to_os_string()
.into_string()
.unwrap();
image_extension = Some(extension);
}

let permit = Arc::clone(&sem);
let target_width = *target_width;
let target_height = *target_height;
let valid_name_set = Arc::clone(&valid_name_set);
let image_extension = image_extension.clone();

let images_output_path = images_output_path.clone();
threads.spawn(async move {
let _ = permit.acquire().await.unwrap();
tracing::info!(
"Processing {}",
entry.file_name().unwrap().to_str().unwrap()
);
let image_id = entry.file_stem().unwrap().to_str().unwrap().to_string();
let img =
imgcodecs::imread(entry.to_str().unwrap(), imgcodecs::IMREAD_UNCHANGED).unwrap();

let size = img.size().unwrap();
let (width, height) = (size.width, size.height);
let y_count = height / target_height as i32;
let x_count = width / target_width as i32;
// let mut labels_map = HashMap::<String, Mat>::new();

// Crop horizontally from left
let row_iter = ProgressAdaptor::new(0..y_count);
let row_progress = row_iter.items_processed();
row_iter.for_each(|row_index| {
for col_index in 0..x_count {
let image_id = format!("{}_LTR_x{}_y{}", image_id, col_index, row_index);
if !valid_name_set.read().unwrap().contains(&image_id) {
continue;
}
let cropped = core::Mat::roi(
&img,
core::Rect::new(
col_index * target_width as i32,
row_index * target_height as i32,
target_width as i32,
target_height as i32,
),
)
.unwrap();

imgcodecs::imwrite(
&format!(
"{}/{}.{}",
images_output_path,
image_id,
image_extension.as_ref().unwrap()
),
&cropped,
&core::Vector::new(),
)
.unwrap();
}
if row_progress.get() != 0 && row_progress.get() % 10 == 0 {
tracing::info!(
"Image {} LTR Row {} / {} done",
image_id,
row_progress.get(),
y_count
);
}
});

tracing::info!("Label {} LTR iteration done", image_id);

// Crop horizontally from right
let row_iter = ProgressAdaptor::new(0..y_count);
let row_progress = row_iter.items_processed();
row_iter.for_each(|row_index| {
for col_index in 0..x_count {
let image_id = format!("{}_RTL_x{}_y{}", image_id, col_index, row_index);
if !valid_name_set.read().unwrap().contains(&image_id) {
continue;
}
let cropped = core::Mat::roi(
&img,
core::Rect::new(
width - (col_index + 1) * target_width as i32,
height - (row_index + 1) * target_height as i32,
target_width as i32,
target_height as i32,
),
)
.unwrap();
imgcodecs::imwrite(
&format!(
"{}/{}.{}",
images_output_path,
image_id,
image_extension.as_ref().unwrap()
),
&cropped,
&core::Vector::new(),
)
.unwrap();
}
if row_progress.get() != 0 && row_progress.get() % 10 == 0 {
tracing::info!(
"Image {} RTL Row {} / {} done",
image_id,
row_progress.get(),
y_count
);
}
});
tracing::info!("Image {} RTL iteration done", image_id);

tracing::info!("Image {} process done", image_id);
});
}

while threads.join_next().await.is_some() {}
}

pub async fn stich_images(splited_images: &String, target_height: &i32, target_width: &i32) {
let entries = fs::read_dir(splited_images).unwrap();
let mut size: Option<(i32, i32)> = None;
43 changes: 39 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -119,8 +119,9 @@ enum CommonCommands {
target_width: u32,
},

/// Split large images to small pieces with a filter for enough valid pixels
SplitImagesWithFilter {
/// Split label images to small pieces with a filter for enough valid pixels
#[command(name = "split-images-with-rgb-filter")]
SplitImagesWithRGBFilter {
#[arg(short, long, help = "The path for the folder containing images")]
images_path: String,

@@ -137,6 +138,26 @@ enum CommonCommands {
valid_rgb_mode: bool,
},

/// Split images to small pieces with a filter for label name match
#[command(name = "split-images-with-label-filter")]
SplitImagesWithLabelFilter {
#[arg(short, long, help = "The path for the folder containing images")]
images_path: String,

#[arg(
short,
long,
help = "The path for the folder containing labels, should be same as images folder"
)]
labels_path: String,

#[arg(long = "height", help = "Height for each split")]
target_height: u32,

#[arg(long = "width", help = "Width for each split")]
target_width: u32,
},

/// Filter dataset with RGB list
#[command(name = "filter-dataset-with-rgblist")]
FilterDatasetWithRGBList {
@@ -496,14 +517,14 @@ async fn main() {
)
.await;
}
CommonCommands::SplitImagesWithFilter {
CommonCommands::SplitImagesWithRGBFilter {
images_path,
target_height,
target_width,
rgb_list,
valid_rgb_mode,
} => {
common::augment::split_images_with_filter(
common::augment::split_images_with_rgb_filter(
images_path,
target_height,
target_width,
@@ -512,6 +533,20 @@ async fn main() {
)
.await;
}
CommonCommands::SplitImagesWithLabelFilter {
images_path,
labels_path,
target_height,
target_width,
} => {
common::augment::split_images_with_label_filter(
images_path,
labels_path,
target_height,
target_width,
)
.await;
}
CommonCommands::FilterDatasetWithRGBList {
dataset_path,
rgb_list,

0 comments on commit 747c89e

Please sign in to comment.