untar in parallel (#18184)

* untar in parallel

* make enum for 'ignore' return value
This commit is contained in:
Jeff Washington (jwash)
2021-06-24 17:29:49 -05:00
committed by GitHub
parent 50deba1f10
commit 31ec986ea1
2 changed files with 76 additions and 27 deletions

View File

@@ -84,6 +84,12 @@ fn check_unpack_result(unpack_result: bool, path: String) -> Result<()> {
Ok(())
}
pub enum UnpackPath<'a> {
Valid(&'a Path),
Ignore,
Invalid,
}
fn unpack_archive<'a, A: Read, C>(
archive: &mut Archive<A>,
apparent_limit_size: u64,
@@ -92,7 +98,7 @@ fn unpack_archive<'a, A: Read, C>(
mut entry_checker: C,
) -> Result<()>
where
C: FnMut(&[&str], tar::EntryType) -> Option<&'a Path>,
C: FnMut(&[&str], tar::EntryType) -> UnpackPath<'a>,
{
let mut apparent_total_size: u64 = 0;
let mut actual_total_size: u64 = 0;
@@ -130,14 +136,17 @@ where
let parts: Vec<_> = parts.map(|p| p.unwrap()).collect();
let unpack_dir = match entry_checker(parts.as_slice(), kind) {
None => {
UnpackPath::Invalid => {
return Err(UnpackError::Archive(format!(
"extra entry found: {:?} {:?}",
path_str,
entry.header().entry_type(),
)));
}
Some(unpack_dir) => unpack_dir,
UnpackPath::Ignore => {
continue;
}
UnpackPath::Valid(unpack_dir) => unpack_dir,
};
apparent_total_size = checked_total_size_sum(
@@ -193,13 +202,27 @@ where
/// Map from AppendVec file name to unpacked file system location
pub type UnpackedAppendVecMap = HashMap<String, PathBuf>;
// select/choose only 'index' out of each # of 'divisions' of total items.
pub struct ParallelSelector {
pub index: usize,
pub divisions: usize,
}
impl ParallelSelector {
pub fn select_index(&self, index: usize) -> bool {
index % self.divisions == self.index
}
}
pub fn unpack_snapshot<A: Read>(
archive: &mut Archive<A>,
ledger_dir: &Path,
account_paths: &[PathBuf],
parallel_selector: Option<ParallelSelector>,
) -> Result<UnpackedAppendVecMap> {
assert!(!account_paths.is_empty());
let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
let mut i = 0;
unpack_archive(
archive,
@@ -208,19 +231,31 @@ pub fn unpack_snapshot<A: Read>(
MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT,
|parts, kind| {
if is_valid_snapshot_archive_entry(parts, kind) {
i += 1;
match &parallel_selector {
Some(parallel_selector) => {
if !parallel_selector.select_index(i - 1) {
return UnpackPath::Ignore;
}
}
None => {}
};
if let ["accounts", file] = parts {
// Randomly distribute the accounts files about the available `account_paths`,
let path_index = thread_rng().gen_range(0, account_paths.len());
account_paths.get(path_index).map(|path_buf| {
match account_paths.get(path_index).map(|path_buf| {
unpacked_append_vec_map
.insert(file.to_string(), path_buf.join("accounts").join(file));
path_buf.as_path()
})
}) {
Some(path) => UnpackPath::Valid(path),
None => UnpackPath::Invalid,
}
} else {
Some(ledger_dir)
UnpackPath::Valid(ledger_dir)
}
} else {
None
UnpackPath::Invalid
}
},
)
@@ -337,9 +372,9 @@ fn unpack_genesis<A: Read>(
MAX_GENESIS_ARCHIVE_UNPACKED_COUNT,
|p, k| {
if is_valid_genesis_archive_entry(p, k) {
Some(unpack_dir)
UnpackPath::Valid(unpack_dir)
} else {
None
UnpackPath::Invalid
}
},
)
@@ -530,7 +565,7 @@ mod tests {
fn finalize_and_unpack_snapshot(archive: tar::Builder<Vec<u8>>) -> Result<()> {
with_finalize_and_unpack(archive, |a, b| {
unpack_snapshot(a, b, &[PathBuf::new()]).map(|_| ())
unpack_snapshot(a, b, &[PathBuf::new()], None).map(|_| ())
})
}