Skip to content

Commit

Permalink
Merge pull request #1 from 0LNetworkCommunity/json-rescue-mode
Browse files Browse the repository at this point in the history
V5 json file parsing from rescue mode
  • Loading branch information
0o-de-lally authored Dec 4, 2024
2 parents 5f58786 + 092594f commit 76d336c
Show file tree
Hide file tree
Showing 27 changed files with 30,402 additions and 82 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:
- cron: "30 00 * * *"

jobs:
types:
all-tests:
timeout-minutes: 60
runs-on: ubuntu-latest
steps:
Expand Down
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ version = "0.0.1"

[dependencies]
anyhow = "^1.0"
bcs = { git = "https://github.com/aptos-labs/bcs.git", rev = "d31fab9d81748e2594be5cd5cdf845786a30562d" }
chrono = { version = "0.4.19", features = ["clock", "serde"] }
clap = { version = "4.3.5", features = ["derive", "unstable-styles"] }
diem-temppath = { git = "https://github.com/0LNetworkCommunity/diem.git", branch = "release" }
Expand All @@ -29,6 +30,9 @@ once_cell = "^1.2"
serde = { version = "^1.0", features = ["derive", "rc"] }
serde_json = { version = "^1", features = ["preserve_order"] }
tokio = { version = "1", features = ["full"] }
hex = "0.4.3"
tar = "0.4.43"
smooth-json = "0.2.7"

[dev-dependencies]
sqlx = { version = "0.8", features = [ "runtime-tokio", "tls-native-tls", "sqlite", "migrate", "macros", "derive", "postgres"] }
Expand Down
39 changes: 21 additions & 18 deletions src/cypher_templates.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! organic free trade template literals for cypher queries
use anyhow::Result;
use anyhow::{Context, Result};

// TODO move this to a .CQL file so we can lint and debug
pub fn write_batch_tx_string(list_str: &str) -> String {
Expand All @@ -16,12 +16,16 @@ MERGE (from)-[rel:Tx {{tx_hash: tx.tx_hash}}]->(to)
ON CREATE SET rel.created_at = timestamp(), rel.modified_at = null
ON MATCH SET rel.modified_at = timestamp()
SET
rel += tx.args,
rel.block_datetime = tx.block_datetime,
rel.block_timestamp = tx.block_timestamp,
rel.relation = tx.relation,
rel.function = tx.function
// Conditionally add `tx.args` if it exists
FOREACH (_ IN CASE WHEN tx.args IS NOT NULL THEN [1] ELSE [] END |
SET rel += tx.args
)
WITH rel
RETURN
Expand Down Expand Up @@ -59,7 +63,7 @@ RETURN
)
}

use anyhow::bail;
use log::warn;
use serde::Serialize;
use serde_json::Value;

Expand All @@ -72,23 +76,24 @@ use serde_json::Value;
/// # Returns
/// A string in the format `{key: value, nested: {key2: value2}, array: [value3, value4]}` that can be used in Cypher queries.
/// Thanks Copilot ;)
pub fn to_cypher_object<T: Serialize>(object: &T, prefix: Option<&str>) -> Result<String> {
pub fn to_cypher_object<T: Serialize>(object: &T) -> Result<String> {
// Serialize the struct to a JSON value

let serialized_value = serde_json::to_value(object).expect("Failed to serialize");
// dbg!(&serialized_value);

// Convert the JSON value into a map for easy processing
let map = if let Value::Object(obj) = serialized_value {
obj
} else {
bail!("Expected the serialized value to be an object");
let flattener = smooth_json::Flattener {
separator: "_",
..Default::default()
};

// Convert the JSON value into a map for easy processing
let flat = flattener.flatten(&serialized_value);
let map = flat.as_object().context("cannot map on json object")?;
// Build properties part of the Cypher object
let properties: Vec<String> = map
.into_iter()
.map(|(mut key, value)| {
.map(|(key, value)| {
let formatted_value = match value {
Value::String(s) => format!("'{}'", s), // Wrap strings in single quotes
Value::Number(n) => n.to_string(), // Use numbers as-is
Expand All @@ -104,19 +109,17 @@ pub fn to_cypher_object<T: Serialize>(object: &T, prefix: Option<&str>) -> Resul
Value::Bool(b) => b.to_string(),
Value::Null => "null".to_string(),
Value::Object(_) => {
to_cypher_object(elem, None).unwrap_or("error".to_owned())
to_cypher_object(elem).unwrap_or("error".to_owned())
} // Recurse for nested objects in arrays
_ => panic!("Unsupported type in array for Cypher serialization"),
_ => "Unsupported type in array for Cypher serialization".to_string(),
})
.collect();
format!("[{}]", elements.join(", "))
}
Value::Object(_) => {
if let Some(p) = prefix {
key = format!("{}.{}", p, key);
}
to_cypher_object(&value, Some(&key)).unwrap_or("error".to_owned())
} // Recurse for nested objects
warn!("the json should have been flattened before this");
"recursive object error".to_string()
}
};
format!("{}: {}", key, formatted_value)
})
Expand Down Expand Up @@ -165,6 +168,6 @@ fn test_serialize_to_cypher_object() {
};

// Serialize to a Cypher object
let cypher_object = to_cypher_object(&person, None).unwrap();
let cypher_object = to_cypher_object(&person).unwrap();
println!("{}", cypher_object);
}
1 change: 0 additions & 1 deletion src/extract_transactions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ pub fn make_master_tx(
tx_hash,
expiration_timestamp: user_tx.expiration_timestamp_secs(),
sender: user_tx.sender(),
recipient: relation_label.get_recipient(),
epoch,
round,
block_timestamp,
Expand Down
227 changes: 227 additions & 0 deletions src/json_rescue_v5_extract.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
use crate::{
schema_transaction::{EntryFunctionArgs, RelationLabel, WarehouseEvent, WarehouseTxMaster},
unzip_temp::decompress_tar_archive,
};
use diem_crypto::HashValue;
use libra_backwards_compatibility::{
sdk::{
v5_0_0_genesis_transaction_script_builder::ScriptFunctionCall as ScriptFunctionCallGenesis,
v5_2_0_transaction_script_builder::ScriptFunctionCall as ScriptFunctionCallV520,
},
version_five::{
legacy_address_v5::LegacyAddressV5,
transaction_type_v5::{TransactionPayload, TransactionV5},
transaction_view_v5::{ScriptView, TransactionDataView, TransactionViewV5},
},
};

use anyhow::{anyhow, Context, Result};
use diem_temppath::TempPath;
use diem_types::account_address::AccountAddress;
use log::trace;
use std::path::{Path, PathBuf};
/// The canonical transaction archives for V5 were kept in a different format as in v6 and v7.
/// As of Nov 2024, there's a project to recover the V5 transaction archives to be in the same bytecode flat file format as v6 and v7.
/// Until then, we must parse the json files.
pub fn extract_v5_json_rescue(
one_json_file: &Path,
) -> Result<(Vec<WarehouseTxMaster>, Vec<WarehouseEvent>)> {
let json = std::fs::read_to_string(one_json_file).context("could not read file")?;

let txs: Vec<TransactionViewV5> = serde_json::from_str(&json)
.map_err(|e| anyhow!("could not parse JSON to TransactionViewV5, {:?}", e))?;

let mut tx_vec = vec![];
let event_vec = vec![];

for t in txs {
let mut wtxs = WarehouseTxMaster::default();
match &t.transaction {
TransactionDataView::UserTransaction { sender, script, .. } => {
wtxs.sender = cast_legacy_account(sender)?;

// must cast from V5 Hashvalue buffer layout
wtxs.tx_hash = HashValue::from_slice(&t.hash.to_vec())?;

wtxs.function = make_function_name(script);
trace!("function: {}", &wtxs.function);

decode_transaction_args(&mut wtxs, &t.bytes)?;

// TODO:
// wtxs.events
// wtxs.block_timestamp

tx_vec.push(wtxs);
}
TransactionDataView::BlockMetadata { timestamp_usecs: _ } => {
// TODO get epoch events
// t.events.iter().any(|e|{
// if let epoch: NewEpoch = e.data {
// }
// })
}
_ => {}
}
}

Ok((tx_vec, event_vec))
}

pub fn decode_transaction_args(wtx: &mut WarehouseTxMaster, tx_bytes: &[u8]) -> Result<()> {
// test we can bcs decode to the transaction object
let t: TransactionV5 = bcs::from_bytes(tx_bytes).unwrap();

if let TransactionV5::UserTransaction(u) = &t {
if let TransactionPayload::ScriptFunction(_) = &u.raw_txn.payload {
if let Some(sf) = &ScriptFunctionCallGenesis::decode(&u.raw_txn.payload) {
// TODO: some script functions have very large payloads which clog the e.g. Miner. So those are only added for the catch-all txs which don't fall into categories we are interested in.
match sf {
ScriptFunctionCallGenesis::BalanceTransfer { destination, .. } => {
wtx.relation_label =
RelationLabel::Transfer(cast_legacy_account(destination)?);

wtx.entry_function = Some(EntryFunctionArgs::V5(sf.to_owned()));
}
ScriptFunctionCallGenesis::CreateAccUser { .. } => {
// onboards self
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}
ScriptFunctionCallGenesis::CreateAccVal { .. } => {
// onboards self
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}

ScriptFunctionCallGenesis::CreateUserByCoinTx { account, .. } => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(account)?);
}
ScriptFunctionCallGenesis::CreateValidatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}
ScriptFunctionCallGenesis::CreateValidatorOperatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}

ScriptFunctionCallGenesis::MinerstateCommit { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
ScriptFunctionCallGenesis::MinerstateCommitByOperator { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
_ => {
wtx.relation_label = RelationLabel::Configuration;

wtx.entry_function = Some(EntryFunctionArgs::V5(sf.to_owned()));
}
}
}

if let Some(sf) = &ScriptFunctionCallV520::decode(&u.raw_txn.payload) {
match sf {
ScriptFunctionCallV520::CreateAccUser { .. } => {
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}
ScriptFunctionCallV520::CreateAccVal { .. } => {
wtx.relation_label = RelationLabel::Onboarding(wtx.sender);
}

ScriptFunctionCallV520::CreateValidatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}
ScriptFunctionCallV520::CreateValidatorOperatorAccount {
sliding_nonce: _,
new_account_address,
..
} => {
wtx.relation_label =
RelationLabel::Onboarding(cast_legacy_account(new_account_address)?);
}
ScriptFunctionCallV520::MinerstateCommit { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
ScriptFunctionCallV520::MinerstateCommitByOperator { .. } => {
wtx.relation_label = RelationLabel::Miner;
}
_ => {
wtx.relation_label = RelationLabel::Configuration;
wtx.entry_function = Some(EntryFunctionArgs::V520(sf.to_owned()));
}
}
}
}
}
Ok(())
}

/// from a tgz file unwrap to temp path
/// NOTE: we return the Temppath object for the directory
/// for the enclosing function to handle
/// since it will delete all the files once it goes out of scope.
pub fn decompress_to_temppath(tgz_file: &Path) -> Result<TempPath> {
let temp_dir = TempPath::new();
temp_dir.create_as_dir()?;

decompress_tar_archive(tgz_file, temp_dir.path())?;

Ok(temp_dir)
}

/// gets all json files decompressed from tgz
pub fn list_all_json_files(search_dir: &Path) -> Result<Vec<PathBuf>> {
let path = search_dir.canonicalize()?;

let pattern = format!(
"{}/**/*.json",
path.to_str().context("cannot parse starting dir")?
);

let vec_pathbuf = glob::glob(&pattern)?.map(|el| el.unwrap()).collect();
Ok(vec_pathbuf)
}

/// gets all json files decompressed from tgz
pub fn list_all_tgz_archives(search_dir: &Path) -> Result<Vec<PathBuf>> {
let path = search_dir.canonicalize()?;

let pattern = format!(
"{}/**/*.tgz",
path.to_str().context("cannot parse starting dir")?
);

let vec_pathbuf = glob::glob(&pattern)?.map(|el| el.unwrap()).collect();
Ok(vec_pathbuf)
}

// TODO: gross borrows, lazy.
fn make_function_name(script: &ScriptView) -> String {
let module = script.module_name.as_ref();

let function = script.function_name.as_ref();

format!(
"0x::{}::{}",
module.unwrap_or(&"none".to_string()),
function.unwrap_or(&"none".to_string())
)
}

fn cast_legacy_account(legacy: &LegacyAddressV5) -> Result<AccountAddress> {
Ok(AccountAddress::from_hex_literal(&legacy.to_hex_literal())?)
}
Loading

0 comments on commit 76d336c

Please sign in to comment.