Skip to content

Commit

Permalink
feat: add txt2json
Browse files Browse the repository at this point in the history
  • Loading branch information
4o3F committed Jan 17, 2025
1 parent 662232a commit 7b78909
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 6 deletions.
36 changes: 31 additions & 5 deletions src/common/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use opencv::{
use parking_lot::RwLock;
use rayon::prelude::*;
use rayon_progress::ProgressAdaptor;
use serde::Serialize;
use std::{
collections::HashMap,
fs,
Expand Down Expand Up @@ -254,11 +255,7 @@ pub async fn generate_dataset_txt(dataset_path: &String, train_ratio: &f32) {
let valid_data = data[train_count as usize..].to_vec();

let dataset_path = dataset_path.to_str().unwrap();
fs::write(
format!("{}/../val.txt", dataset_path),
valid_data.concat(),
)
.unwrap();
fs::write(format!("{}/../val.txt", dataset_path), valid_data.concat()).unwrap();
fs::write(
format!("{}/../train.txt", dataset_path),
train_data.concat(),
Expand All @@ -271,6 +268,35 @@ pub async fn generate_dataset_txt(dataset_path: &String, train_ratio: &f32) {
tracing::info!("Dataset split done");
}

pub fn txt2json(txt_path: &String) {
let save_path = PathBuf::from(txt_path);
let (save_path, file_name) = (
save_path.parent().unwrap().to_str().unwrap().to_string(),
save_path.file_stem().unwrap().to_str().unwrap().to_string(),
);
let save_path = PathBuf::from(format!("{}/{}.json", save_path, file_name));

#[derive(Serialize)]
struct DatasetItem {
image: String,
label: String,
}

let mut json = Vec::new();
for line in fs::read_to_string(txt_path).unwrap().lines() {
let image = line.to_string();
let label = line.replace("/images/", "/labels/");
let label = label.replace(".tif", ".png");
json.push(DatasetItem { image, label });
}

let json = serde_json::to_string(&json).unwrap();

fs::write(&save_path, json).unwrap();

tracing::info!("Saved to {}", save_path.display());
}

// TODO: rewrite this to make it suitable for all datasets
pub async fn split_dataset(dataset_path: &String, train_ratio: &f32) {
// Check dataset_path contain images and labels folder
Expand Down
11 changes: 10 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ enum CommonCommands {
train_ratio: f32,
},

/// Split dataset into train and test sets and save file names to txt file
/// Split dataset into train and test sets and save file names to txt file, for yolo dataset
#[command(name = "generate-dataset-txt")]
GenerateDatasetTXT {
#[arg(
Expand All @@ -297,6 +297,12 @@ enum CommonCommands {
train_ratio: f32,
},

#[command(name = "txt2json")]
TXT2JSON {
#[arg(short, long, help = "TXT file path")]
txt_path: String,
},

/// Combine multiple JSON format dataset list compatible with huggingface dataset library
#[command(name = "combine-dataset-json")]
CombineDatasetJSON {
Expand Down Expand Up @@ -604,6 +610,9 @@ async fn main() {
} => {
common::dataset::generate_dataset_json(dataset_path, train_ratio);
}
CommonCommands::TXT2JSON { txt_path } => {
common::dataset::txt2json(txt_path);
}
CommonCommands::CombineDatasetJSON {
dataset_path,
save_path,
Expand Down

0 comments on commit 7b78909

Please sign in to comment.