Remove CUDA feature (#6094)
This commit is contained in:
@ -14,7 +14,6 @@ edition = "2018"
|
||||
codecov = { repository = "solana-labs/solana", branch = "master", service = "github" }
|
||||
|
||||
[features]
|
||||
cuda = []
|
||||
pin_gpu_memory = []
|
||||
|
||||
[dependencies]
|
||||
@ -27,6 +26,8 @@ core_affinity = "0.5.9"
|
||||
crc = { version = "1.8.1", optional = true }
|
||||
crossbeam-channel = "0.3"
|
||||
dir-diff = "0.3.1"
|
||||
dlopen = "0.1.8"
|
||||
dlopen_derive = "0.1.4"
|
||||
fs_extra = "1.1.0"
|
||||
indexmap = "1.1"
|
||||
itertools = "0.8.0"
|
||||
|
@ -1,50 +0,0 @@
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
||||
if env::var("CARGO_FEATURE_CUDA").is_ok() {
|
||||
if cfg!(not(target_os = "linux")) {
|
||||
eprintln!("Error: CUDA feature is only available on Linux");
|
||||
exit(1);
|
||||
}
|
||||
println!("cargo:rustc-cfg=cuda");
|
||||
|
||||
let perf_libs_dir = {
|
||||
let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
|
||||
let mut path = Path::new(&manifest_dir);
|
||||
path = path.parent().unwrap();
|
||||
let mut path = path.join(Path::new("target/perf-libs"));
|
||||
path.push(env::var("SOLANA_PERF_LIBS_CUDA").unwrap_or_else(|err| {
|
||||
eprintln!("Error: SOLANA_PERF_LIBS_CUDA not defined: {}", err);
|
||||
exit(1);
|
||||
}));
|
||||
path
|
||||
};
|
||||
let perf_libs_dir = perf_libs_dir.to_str().unwrap();
|
||||
|
||||
// Ensure `perf_libs_dir` exists. It's been observed that
|
||||
// a cargo:rerun-if-changed= directive with a non-existent
|
||||
// directory triggers a rebuild on every |cargo build| invocation
|
||||
fs::create_dir_all(&perf_libs_dir).unwrap_or_else(|err| {
|
||||
if err.kind() != std::io::ErrorKind::AlreadyExists {
|
||||
panic!("Unable to create {}: {:?}", perf_libs_dir, err);
|
||||
}
|
||||
});
|
||||
println!("cargo:rerun-if-changed={}", perf_libs_dir);
|
||||
println!("cargo:rustc-link-search=native={}", perf_libs_dir);
|
||||
if cfg!(windows) {
|
||||
println!("cargo:rerun-if-changed={}/libcuda-crypt.dll", perf_libs_dir);
|
||||
} else if cfg!(target_os = "macos") {
|
||||
println!(
|
||||
"cargo:rerun-if-changed={}/libcuda-crypt.dylib",
|
||||
perf_libs_dir
|
||||
);
|
||||
} else {
|
||||
println!("cargo:rerun-if-changed={}/libcuda-crypt.so", perf_libs_dir);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,11 +1,8 @@
|
||||
// Module used by validators to approve storage mining proofs
|
||||
// // in parallel using the GPU
|
||||
// Module used by validators to approve storage mining proofs in parallel using the GPU
|
||||
|
||||
use crate::blocktree::Blocktree;
|
||||
use crate::chacha::{CHACHA_BLOCK_SIZE, CHACHA_KEY_SIZE};
|
||||
use crate::sigverify::{
|
||||
chacha_cbc_encrypt_many_sample, chacha_end_sha_state, chacha_init_sha_state,
|
||||
};
|
||||
use crate::perf_libs;
|
||||
use solana_sdk::hash::Hash;
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
@ -22,6 +19,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
|
||||
ivecs: &mut [u8],
|
||||
samples: &[u64],
|
||||
) -> io::Result<Vec<Hash>> {
|
||||
let api = perf_libs::api().expect("no perf libs");
|
||||
if ivecs.len() % CHACHA_BLOCK_SIZE != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
@ -45,7 +43,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
|
||||
let mut total_size = 0;
|
||||
let mut time: f32 = 0.0;
|
||||
unsafe {
|
||||
chacha_init_sha_state(int_sha_states.as_mut_ptr(), num_keys as u32);
|
||||
(api.chacha_init_sha_state)(int_sha_states.as_mut_ptr(), num_keys as u32);
|
||||
}
|
||||
loop {
|
||||
match blocktree.get_data_shreds(current_slot, start_index, std::u64::MAX, &mut buffer) {
|
||||
@ -73,7 +71,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
|
||||
}
|
||||
|
||||
unsafe {
|
||||
chacha_cbc_encrypt_many_sample(
|
||||
(api.chacha_cbc_encrypt_many_sample)(
|
||||
buffer[..size].as_ptr(),
|
||||
int_sha_states.as_mut_ptr(),
|
||||
size,
|
||||
@ -97,7 +95,7 @@ pub fn chacha_cbc_encrypt_file_many_keys(
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
chacha_end_sha_state(
|
||||
(api.chacha_end_sha_state)(
|
||||
int_sha_states.as_ptr(),
|
||||
sha_states.as_mut_ptr(),
|
||||
num_keys as u32,
|
||||
@ -114,22 +112,23 @@ pub fn chacha_cbc_encrypt_file_many_keys(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::blocktree::get_tmp_ledger_path;
|
||||
use crate::blocktree::Blocktree;
|
||||
use crate::chacha::chacha_cbc_encrypt_ledger;
|
||||
use crate::chacha_cuda::chacha_cbc_encrypt_file_many_keys;
|
||||
use crate::entry::create_ticks;
|
||||
use crate::replicator::sample_file;
|
||||
use solana_sdk::clock::DEFAULT_SLOTS_PER_SEGMENT;
|
||||
use solana_sdk::hash::Hash;
|
||||
use solana_sdk::signature::{Keypair, KeypairUtil};
|
||||
use std::fs::{remove_dir_all, remove_file};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_file_many_keys_single() {
|
||||
solana_logger::setup();
|
||||
if perf_libs::api().is_none() {
|
||||
info!("perf-libs unavailable, skipped");
|
||||
return;
|
||||
}
|
||||
|
||||
let slots_per_segment = 32;
|
||||
let entries = create_ticks(slots_per_segment, Hash::default());
|
||||
@ -189,6 +188,10 @@ mod tests {
|
||||
#[test]
|
||||
fn test_encrypt_file_many_keys_multiple_keys() {
|
||||
solana_logger::setup();
|
||||
if perf_libs::api().is_none() {
|
||||
info!("perf-libs unavailable, skipped");
|
||||
return;
|
||||
}
|
||||
|
||||
let entries = create_ticks(32, Hash::default());
|
||||
let ledger_dir = "test_encrypt_file_many_keys_multiple";
|
||||
@ -255,6 +258,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_file_many_keys_bad_key_length() {
|
||||
solana_logger::setup();
|
||||
if perf_libs::api().is_none() {
|
||||
info!("perf-libs unavailable, skipped");
|
||||
return;
|
||||
}
|
||||
|
||||
let mut keys = hex!("abc123");
|
||||
let ledger_dir = "test_encrypt_file_many_keys_bad_key_length";
|
||||
let ledger_path = get_tmp_ledger_path(ledger_dir);
|
||||
|
@ -5,48 +5,55 @@
|
||||
// copies from host memory to GPU memory unless the memory is page-pinned and
|
||||
// cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory.
|
||||
|
||||
#[cfg(feature = "pin_gpu_memory")]
|
||||
use crate::perf_libs;
|
||||
use crate::recycler::Reset;
|
||||
|
||||
#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
|
||||
use crate::sigverify::{cuda_host_register, cuda_host_unregister};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
|
||||
#[cfg(feature = "pin_gpu_memory")]
|
||||
use std::os::raw::c_int;
|
||||
|
||||
#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
|
||||
#[cfg(feature = "pin_gpu_memory")]
|
||||
const CUDA_SUCCESS: c_int = 0;
|
||||
|
||||
pub fn pin<T>(_mem: &mut Vec<T>) {
|
||||
#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
|
||||
unsafe {
|
||||
use core::ffi::c_void;
|
||||
use std::mem::size_of;
|
||||
#[cfg(feature = "pin_gpu_memory")]
|
||||
{
|
||||
if let Some(api) = perf_libs::api() {
|
||||
unsafe {
|
||||
use core::ffi::c_void;
|
||||
use std::mem::size_of;
|
||||
|
||||
let err = cuda_host_register(
|
||||
_mem.as_mut_ptr() as *mut c_void,
|
||||
_mem.capacity() * size_of::<T>(),
|
||||
0,
|
||||
);
|
||||
if err != CUDA_SUCCESS {
|
||||
error!(
|
||||
"cudaHostRegister error: {} ptr: {:?} bytes: {}",
|
||||
err,
|
||||
_mem.as_ptr(),
|
||||
_mem.capacity() * size_of::<T>()
|
||||
);
|
||||
let err = (api.cuda_host_register)(
|
||||
_mem.as_mut_ptr() as *mut c_void,
|
||||
_mem.capacity() * size_of::<T>(),
|
||||
0,
|
||||
);
|
||||
if err != CUDA_SUCCESS {
|
||||
error!(
|
||||
"cudaHostRegister error: {} ptr: {:?} bytes: {}",
|
||||
err,
|
||||
_mem.as_ptr(),
|
||||
_mem.capacity() * size_of::<T>()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unpin<T>(_mem: *mut T) {
|
||||
#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))]
|
||||
unsafe {
|
||||
use core::ffi::c_void;
|
||||
#[cfg(feature = "pin_gpu_memory")]
|
||||
{
|
||||
if let Some(api) = perf_libs::api() {
|
||||
unsafe {
|
||||
use core::ffi::c_void;
|
||||
|
||||
let err = cuda_host_unregister(_mem as *mut c_void);
|
||||
if err != CUDA_SUCCESS {
|
||||
error!("cudaHostUnregister returned: {} ptr: {:?}", err, _mem);
|
||||
let err = (api.cuda_host_unregister)(_mem as *mut c_void);
|
||||
if err != CUDA_SUCCESS {
|
||||
error!("cudaHostUnregister returned: {} ptr: {:?}", err, _mem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
//! transactions within it. Entries cannot be reordered, and its field `num_hashes`
|
||||
//! represents an approximate amount of time since the last Entry was created.
|
||||
use crate::packet::{Blob, SharedBlob};
|
||||
use crate::perf_libs;
|
||||
use crate::poh::Poh;
|
||||
use crate::result::Result;
|
||||
use bincode::{deserialize, serialized_size};
|
||||
@ -10,20 +11,14 @@ use rayon::prelude::*;
|
||||
use rayon::ThreadPool;
|
||||
use solana_merkle_tree::MerkleTree;
|
||||
use solana_metrics::inc_new_counter_warn;
|
||||
use solana_rayon_threadlimit::get_thread_count;
|
||||
use solana_sdk::hash::Hash;
|
||||
use solana_sdk::timing;
|
||||
use solana_sdk::transaction::Transaction;
|
||||
use std::borrow::Borrow;
|
||||
use std::cell::RefCell;
|
||||
use std::sync::mpsc::{Receiver, Sender};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
use crate::sigverify::poh_verify_many;
|
||||
use solana_rayon_threadlimit::get_thread_count;
|
||||
#[cfg(feature = "cuda")]
|
||||
use std::sync::Mutex;
|
||||
#[cfg(feature = "cuda")]
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
use std::thread;
|
||||
use std::time::Instant;
|
||||
|
||||
@ -257,13 +252,12 @@ impl EntrySlice for [Entry] {
|
||||
res
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
fn verify(&self, start_hash: &Hash) -> bool {
|
||||
self.verify_cpu(start_hash)
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
fn verify(&self, start_hash: &Hash) -> bool {
|
||||
let api = perf_libs::api();
|
||||
if api.is_none() {
|
||||
return self.verify_cpu(start_hash);
|
||||
}
|
||||
let api = api.unwrap();
|
||||
inc_new_counter_warn!("entry_verify-num_entries", self.len() as usize);
|
||||
|
||||
// Use CPU verify if the batch length is < 1K
|
||||
@ -287,7 +281,7 @@ impl EntrySlice for [Entry] {
|
||||
.collect();
|
||||
|
||||
let num_hashes_vec: Vec<u64> = self
|
||||
.into_iter()
|
||||
.iter()
|
||||
.map(|entry| entry.num_hashes.saturating_sub(1))
|
||||
.collect();
|
||||
|
||||
@ -300,7 +294,7 @@ impl EntrySlice for [Entry] {
|
||||
let mut hashes = hashes_clone.lock().unwrap();
|
||||
let res;
|
||||
unsafe {
|
||||
res = poh_verify_many(
|
||||
res = (api.poh_verify_many)(
|
||||
hashes.as_mut_ptr() as *mut u8,
|
||||
num_hashes_vec.as_ptr(),
|
||||
length,
|
||||
|
@ -10,10 +10,10 @@ pub mod banking_stage;
|
||||
pub mod blob_fetch_stage;
|
||||
pub mod broadcast_stage;
|
||||
pub mod chacha;
|
||||
#[cfg(cuda)]
|
||||
pub mod chacha_cuda;
|
||||
pub mod cluster_info_vote_listener;
|
||||
pub mod confidence;
|
||||
pub mod perf_libs;
|
||||
pub mod recycler;
|
||||
#[macro_use]
|
||||
pub mod contact_info;
|
||||
@ -75,6 +75,9 @@ pub(crate) mod version;
|
||||
pub mod weighted_shuffle;
|
||||
pub mod window_service;
|
||||
|
||||
#[macro_use]
|
||||
extern crate dlopen_derive;
|
||||
|
||||
#[macro_use]
|
||||
extern crate solana_budget_program;
|
||||
|
||||
|
171
core/src/perf_libs.rs
Normal file
171
core/src/perf_libs.rs
Normal file
@ -0,0 +1,171 @@
|
||||
use crate::packet::Packet;
|
||||
use core::ffi::c_void;
|
||||
use dlopen::symbor::{Container, SymBorApi, Symbol};
|
||||
use std::env;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::os::raw::{c_int, c_uint};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Once;
|
||||
|
||||
#[repr(C)]
|
||||
pub struct Elems {
|
||||
pub elems: *const Packet,
|
||||
pub num: u32,
|
||||
}
|
||||
|
||||
#[derive(SymBorApi)]
|
||||
pub struct Api<'a> {
|
||||
pub ed25519_init: Symbol<'a, unsafe extern "C" fn() -> bool>,
|
||||
pub ed25519_set_verbose: Symbol<'a, unsafe extern "C" fn(val: bool)>,
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub ed25519_verify_many: Symbol<
|
||||
'a,
|
||||
unsafe extern "C" fn(
|
||||
vecs: *const Elems,
|
||||
num: u32, //number of vecs
|
||||
message_size: u32, //size of each element inside the elems field of the vec
|
||||
total_packets: u32,
|
||||
total_signatures: u32,
|
||||
message_lens: *const u32,
|
||||
pubkey_offsets: *const u32,
|
||||
signature_offsets: *const u32,
|
||||
signed_message_offsets: *const u32,
|
||||
out: *mut u8, //combined length of all the items in vecs
|
||||
use_non_default_stream: u8,
|
||||
) -> u32,
|
||||
>,
|
||||
|
||||
pub chacha_cbc_encrypt_many_sample: Symbol<
|
||||
'a,
|
||||
unsafe extern "C" fn(
|
||||
input: *const u8,
|
||||
sha_state: *mut u8,
|
||||
in_len: usize,
|
||||
keys: *const u8,
|
||||
ivec: *mut u8,
|
||||
num_keys: u32,
|
||||
samples: *const u64,
|
||||
num_samples: u32,
|
||||
starting_block: u64,
|
||||
time_us: *mut f32,
|
||||
),
|
||||
>,
|
||||
|
||||
pub chacha_init_sha_state: Symbol<'a, unsafe extern "C" fn(sha_state: *mut u8, num_keys: u32)>,
|
||||
pub chacha_end_sha_state:
|
||||
Symbol<'a, unsafe extern "C" fn(sha_state_in: *const u8, out: *mut u8, num_keys: u32)>,
|
||||
|
||||
pub poh_verify_many: Symbol<
|
||||
'a,
|
||||
unsafe extern "C" fn(
|
||||
hashes: *mut u8,
|
||||
num_hashes_arr: *const u64,
|
||||
num_elems: usize,
|
||||
use_non_default_stream: u8,
|
||||
) -> c_int,
|
||||
>,
|
||||
|
||||
pub cuda_host_register:
|
||||
Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int>,
|
||||
|
||||
pub cuda_host_unregister: Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void) -> c_int>,
|
||||
}
|
||||
|
||||
static mut API: Option<Container<Api>> = None;
|
||||
|
||||
fn init(name: &OsStr) {
|
||||
static INIT_HOOK: Once = Once::new();
|
||||
|
||||
info!("Loading {:?}", name);
|
||||
unsafe {
|
||||
INIT_HOOK.call_once(|| {
|
||||
API = Some(Container::load(name).unwrap_or_else(|err| {
|
||||
error!("Unable to load {:?}: {}", name, err);
|
||||
std::process::exit(1);
|
||||
}));
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn locate_perf_libs() -> Option<PathBuf> {
|
||||
let exe = env::current_exe().expect("Unable to get executable path");
|
||||
let perf_libs = exe.parent().unwrap().join("perf-libs");
|
||||
if perf_libs.is_dir() {
|
||||
info!("perf-libs found at {:?}", perf_libs);
|
||||
return Some(perf_libs);
|
||||
}
|
||||
warn!("{:?} does not exist", perf_libs);
|
||||
None
|
||||
}
|
||||
|
||||
fn find_cuda_home(perf_libs_path: &Path) -> Option<PathBuf> {
|
||||
// Search /usr/local for a `cuda-` directory that matches a perf-libs subdirectory
|
||||
for entry in fs::read_dir(&perf_libs_path).unwrap() {
|
||||
if let Ok(entry) = entry {
|
||||
let path = entry.path();
|
||||
if !path.is_dir() {
|
||||
continue;
|
||||
}
|
||||
let dir_name = path.file_name().unwrap().to_str().unwrap_or("");
|
||||
if !dir_name.starts_with("cuda-") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let cuda_home: PathBuf = ["/", "usr", "local", dir_name].iter().collect();
|
||||
if !cuda_home.is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
return Some(cuda_home);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn init_cuda() {
|
||||
if let Some(perf_libs_path) = locate_perf_libs() {
|
||||
if let Some(cuda_home) = find_cuda_home(&perf_libs_path) {
|
||||
info!("CUDA installation found at {:?}", cuda_home);
|
||||
|
||||
let cuda_lib64_dir = cuda_home.join("lib64");
|
||||
if cuda_lib64_dir.is_dir() {
|
||||
let ld_library_path = cuda_lib64_dir.to_str().unwrap_or("").to_string()
|
||||
+ ":"
|
||||
+ &env::var("LD_LIBRARY_PATH").unwrap_or_else(|_| "".to_string());
|
||||
info!("LD_LIBRARY_PATH set to {:?}", ld_library_path);
|
||||
|
||||
// Prefix LD_LIBRARY_PATH with $CUDA_HOME/lib64 directory
|
||||
// to ensure the correct CUDA version is used
|
||||
env::set_var("LD_LIBRARY_PATH", ld_library_path)
|
||||
} else {
|
||||
warn!("{:?} does not exist", cuda_lib64_dir);
|
||||
}
|
||||
|
||||
let libcuda_crypt = perf_libs_path
|
||||
.join(cuda_home.file_name().unwrap())
|
||||
.join("libcuda-crypt.so");
|
||||
return init(libcuda_crypt.as_os_str());
|
||||
} else {
|
||||
warn!("CUDA installation not found");
|
||||
}
|
||||
}
|
||||
|
||||
// Last resort! Blindly load the shared object and hope it all works out
|
||||
init(OsStr::new("libcuda-crypt.so"))
|
||||
}
|
||||
|
||||
pub fn api() -> Option<&'static Container<Api<'static>>> {
|
||||
#[cfg(test)]
|
||||
{
|
||||
static INIT_HOOK: Once = Once::new();
|
||||
INIT_HOOK.call_once(|| {
|
||||
if std::env::var("TEST_PERF_LIBS_CUDA").is_ok() {
|
||||
init_cuda();
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
unsafe { API.as_ref() }
|
||||
}
|
@ -1,11 +1,12 @@
|
||||
//! The `sigverify` module provides digital signature verification functions.
|
||||
//! By default, signatures are verified in parallel using all available CPU
|
||||
//! cores. When `--features=cuda` is enabled, signature verification is
|
||||
//! offloaded to the GPU.
|
||||
//! cores. When perf-libs are available signature verification is offloaded
|
||||
//! to the GPU.
|
||||
//!
|
||||
|
||||
use crate::cuda_runtime::PinnedVec;
|
||||
use crate::packet::{Packet, Packets};
|
||||
use crate::perf_libs;
|
||||
use crate::recycler::Recycler;
|
||||
use crate::result::Result;
|
||||
use bincode::serialized_size;
|
||||
@ -19,11 +20,7 @@ use solana_sdk::signature::Signature;
|
||||
use solana_sdk::transaction::Transaction;
|
||||
use std::mem::size_of;
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
use core::ffi::c_void;
|
||||
use solana_rayon_threadlimit::get_thread_count;
|
||||
#[cfg(feature = "cuda")]
|
||||
use std::os::raw::{c_int, c_uint};
|
||||
pub const NUM_THREADS: u32 = 10;
|
||||
use std::cell::RefCell;
|
||||
|
||||
@ -36,62 +33,16 @@ pub type TxOffset = PinnedVec<u32>;
|
||||
|
||||
type TxOffsets = (TxOffset, TxOffset, TxOffset, TxOffset, Vec<Vec<u32>>);
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[repr(C)]
|
||||
struct Elems {
|
||||
elems: *const Packet,
|
||||
num: u32,
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[link(name = "cuda-crypt")]
|
||||
extern "C" {
|
||||
fn ed25519_init() -> bool;
|
||||
fn ed25519_set_verbose(val: bool);
|
||||
fn ed25519_verify_many(
|
||||
vecs: *const Elems,
|
||||
num: u32, //number of vecs
|
||||
message_size: u32, //size of each element inside the elems field of the vec
|
||||
total_packets: u32,
|
||||
total_signatures: u32,
|
||||
message_lens: *const u32,
|
||||
pubkey_offsets: *const u32,
|
||||
signature_offsets: *const u32,
|
||||
signed_message_offsets: *const u32,
|
||||
out: *mut u8, //combined length of all the items in vecs
|
||||
use_non_default_stream: u8,
|
||||
) -> u32;
|
||||
|
||||
pub fn chacha_cbc_encrypt_many_sample(
|
||||
input: *const u8,
|
||||
sha_state: *mut u8,
|
||||
in_len: usize,
|
||||
keys: *const u8,
|
||||
ivec: *mut u8,
|
||||
num_keys: u32,
|
||||
samples: *const u64,
|
||||
num_samples: u32,
|
||||
starting_block: u64,
|
||||
time_us: *mut f32,
|
||||
);
|
||||
|
||||
pub fn chacha_init_sha_state(sha_state: *mut u8, num_keys: u32);
|
||||
pub fn chacha_end_sha_state(sha_state_in: *const u8, out: *mut u8, num_keys: u32);
|
||||
|
||||
pub fn poh_verify_many(
|
||||
hashes: *mut u8,
|
||||
num_hashes_arr: *const u64,
|
||||
num_elems: usize,
|
||||
use_non_default_stream: u8,
|
||||
) -> c_int;
|
||||
|
||||
pub fn cuda_host_register(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int;
|
||||
pub fn cuda_host_unregister(ptr: *mut c_void) -> c_int;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
pub fn init() {
|
||||
// stub
|
||||
if let Some(api) = perf_libs::api() {
|
||||
unsafe {
|
||||
(api.ed25519_set_verbose)(true);
|
||||
if !(api.ed25519_init)() {
|
||||
panic!("ed25519_init() failed");
|
||||
}
|
||||
(api.ed25519_set_verbose)(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_packet(packet: &Packet) -> u8 {
|
||||
@ -130,15 +81,6 @@ fn batch_size(batches: &[Packets]) -> usize {
|
||||
batches.iter().map(|p| p.packets.len()).sum()
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
pub fn ed25519_verify(
|
||||
batches: &[Packets],
|
||||
_recycler: &Recycler<TxOffset>,
|
||||
_recycler_out: &Recycler<PinnedVec<u8>>,
|
||||
) -> Vec<Vec<u8>> {
|
||||
ed25519_verify_cpu(batches)
|
||||
}
|
||||
|
||||
pub fn get_packet_offsets(packet: &Packet, current_offset: u32) -> (u32, u32, u32, u32) {
|
||||
let (sig_len, sig_size) = decode_len(&packet.data);
|
||||
let msg_start_offset = sig_size + sig_len * size_of::<Signature>();
|
||||
@ -235,23 +177,17 @@ pub fn ed25519_verify_disabled(batches: &[Packets]) -> Vec<Vec<u8>> {
|
||||
rv
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
pub fn init() {
|
||||
unsafe {
|
||||
ed25519_set_verbose(true);
|
||||
if !ed25519_init() {
|
||||
panic!("ed25519_init() failed");
|
||||
}
|
||||
ed25519_set_verbose(false);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
pub fn ed25519_verify(
|
||||
batches: &[Packets],
|
||||
recycler: &Recycler<TxOffset>,
|
||||
recycler_out: &Recycler<PinnedVec<u8>>,
|
||||
) -> Vec<Vec<u8>> {
|
||||
let api = perf_libs::api();
|
||||
if api.is_none() {
|
||||
return ed25519_verify_cpu(batches);
|
||||
}
|
||||
let api = api.unwrap();
|
||||
|
||||
use crate::packet::PACKET_DATA_SIZE;
|
||||
let count = batch_size(batches);
|
||||
|
||||
@ -276,7 +212,7 @@ pub fn ed25519_verify(
|
||||
|
||||
let mut num_packets = 0;
|
||||
for p in batches {
|
||||
elems.push(Elems {
|
||||
elems.push(perf_libs::Elems {
|
||||
elems: p.packets.as_ptr(),
|
||||
num: p.packets.len() as u32,
|
||||
});
|
||||
@ -292,7 +228,7 @@ pub fn ed25519_verify(
|
||||
trace!("len offset: {}", PACKET_DATA_SIZE as u32);
|
||||
const USE_NON_DEFAULT_STREAM: u8 = 1;
|
||||
unsafe {
|
||||
let res = ed25519_verify_many(
|
||||
let res = (api.ed25519_verify_many)(
|
||||
elems.as_ptr(),
|
||||
elems.len() as u32,
|
||||
size_of::<Packet>() as u32,
|
||||
|
@ -3,10 +3,11 @@
|
||||
//! top-level list with a list of booleans, telling the next stage whether the
|
||||
//! signature in that packet is valid. It assumes each packet contains one
|
||||
//! transaction. All processing is done on the CPU by default and on a GPU
|
||||
//! if the `cuda` feature is enabled with `--features=cuda`.
|
||||
//! if perf-libs are available
|
||||
|
||||
use crate::cuda_runtime::PinnedVec;
|
||||
use crate::packet::Packets;
|
||||
use crate::perf_libs;
|
||||
use crate::recycler::Recycler;
|
||||
use crate::result::{Error, Result};
|
||||
use crate::service::Service;
|
||||
@ -21,11 +22,8 @@ use std::sync::mpsc::{Receiver, RecvTimeoutError};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread::{self, Builder, JoinHandle};
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
const RECV_BATCH_MAX: usize = 5_000;
|
||||
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
const RECV_BATCH_MAX: usize = 1000;
|
||||
const RECV_BATCH_MAX_CPU: usize = 1_000;
|
||||
const RECV_BATCH_MAX_GPU: usize = 5_000;
|
||||
|
||||
pub type VerifiedPackets = Vec<(Packets, Vec<u8>)>;
|
||||
|
||||
@ -70,7 +68,11 @@ impl SigVerifyStage {
|
||||
) -> Result<()> {
|
||||
let (batch, len, recv_time) = streamer::recv_batch(
|
||||
&recvr.lock().expect("'recvr' lock in fn verifier"),
|
||||
RECV_BATCH_MAX,
|
||||
if perf_libs::api().is_some() {
|
||||
RECV_BATCH_MAX_GPU
|
||||
} else {
|
||||
RECV_BATCH_MAX_CPU
|
||||
},
|
||||
)?;
|
||||
inc_new_counter_info!("sigverify_stage-packets_received", len);
|
||||
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
use crate::bank_forks::BankForks;
|
||||
use crate::blocktree::Blocktree;
|
||||
#[cfg(cuda)]
|
||||
use crate::chacha_cuda::chacha_cbc_encrypt_file_many_keys;
|
||||
use crate::cluster_info::ClusterInfo;
|
||||
use crate::result::{Error, Result};
|
||||
@ -408,11 +407,11 @@ impl StorageStage {
|
||||
samples.push(rng.gen_range(0, 10));
|
||||
}
|
||||
debug!("generated samples: {:?}", samples);
|
||||
|
||||
// TODO: cuda required to generate the reference values
|
||||
// but if it is missing, then we need to take care not to
|
||||
// process storage mining results.
|
||||
#[cfg(cuda)]
|
||||
{
|
||||
if crate::perf_libs::api().is_some() {
|
||||
// Lock the keys, since this is the IV memory,
|
||||
// it will be updated in-place by the encryption.
|
||||
// Should be overwritten by the proof signatures which replace the
|
||||
@ -729,10 +728,8 @@ mod tests {
|
||||
let keypair = Keypair::new();
|
||||
let hash = Hash::default();
|
||||
let signature = keypair.sign_message(&hash.as_ref());
|
||||
#[cfg(feature = "cuda")]
|
||||
|
||||
let mut result = storage_state.get_mining_result(&signature);
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
let result = storage_state.get_mining_result(&signature);
|
||||
|
||||
assert_eq!(result, Hash::default());
|
||||
|
||||
@ -752,26 +749,27 @@ mod tests {
|
||||
.collect::<Vec<_>>();
|
||||
bank_sender.send(rooted_banks).unwrap();
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
for _ in 0..5 {
|
||||
result = storage_state.get_mining_result(&signature);
|
||||
if result != Hash::default() {
|
||||
info!("found result = {:?} sleeping..", result);
|
||||
break;
|
||||
if crate::perf_libs::api().is_some() {
|
||||
for _ in 0..5 {
|
||||
result = storage_state.get_mining_result(&signature);
|
||||
if result != Hash::default() {
|
||||
info!("found result = {:?} sleeping..", result);
|
||||
break;
|
||||
}
|
||||
info!("result = {:?} sleeping..", result);
|
||||
sleep(Duration::new(1, 0));
|
||||
}
|
||||
info!("result = {:?} sleeping..", result);
|
||||
sleep(Duration::new(1, 0));
|
||||
}
|
||||
|
||||
info!("joining..?");
|
||||
exit.store(true, Ordering::Relaxed);
|
||||
storage_stage.join().unwrap();
|
||||
|
||||
#[cfg(not(cuda))]
|
||||
assert_eq!(result, Hash::default());
|
||||
|
||||
#[cfg(cuda)]
|
||||
assert_ne!(result, Hash::default());
|
||||
if crate::perf_libs::api().is_some() {
|
||||
assert_ne!(result, Hash::default());
|
||||
} else {
|
||||
assert_eq!(result, Hash::default());
|
||||
}
|
||||
|
||||
remove_dir_all(ledger_path).unwrap();
|
||||
}
|
||||
|
@ -118,7 +118,14 @@ impl Validator {
|
||||
|
||||
warn!("identity pubkey: {:?}", id);
|
||||
warn!("vote pubkey: {:?}", vote_account);
|
||||
warn!("CUDA is {}abled", if cfg!(cuda) { "en" } else { "dis" });
|
||||
warn!(
|
||||
"CUDA is {}abled",
|
||||
if crate::perf_libs::api().is_some() {
|
||||
"en"
|
||||
} else {
|
||||
"dis"
|
||||
}
|
||||
);
|
||||
info!("entrypoint: {:?}", entrypoint_info_option);
|
||||
|
||||
Self::print_node_info(&node);
|
||||
|
Reference in New Issue
Block a user