diff --git a/core/src/banking_stage.rs b/core/src/banking_stage.rs
index 0c62b2e7c0..727aff6337 100644
--- a/core/src/banking_stage.rs
+++ b/core/src/banking_stage.rs
@@ -8,6 +8,7 @@ use crate::{
     leader_schedule_cache::LeaderScheduleCache,
     packet::PACKETS_PER_BATCH,
     packet::{Packet, Packets},
+    perf_libs,
     poh_recorder::{PohRecorder, PohRecorderError, WorkingBankEntry},
     poh_service::PohService,
     result::{Error, Result},
@@ -20,6 +21,7 @@ use itertools::Itertools;
 use solana_measure::measure::Measure;
 use solana_metrics::{inc_new_counter_debug, inc_new_counter_info, inc_new_counter_warn};
 use solana_runtime::{accounts_db::ErrorCounters, bank::Bank, transaction_batch::TransactionBatch};
+use solana_sdk::clock::MAX_TRANSACTION_FORWARDING_DELAY_GPU;
 use solana_sdk::{
     clock::{
         DEFAULT_TICKS_PER_SECOND, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE,
@@ -697,12 +699,18 @@ impl BankingStage {
         //  1. Transaction forwarding delay
         //  2. The slot at which the next leader will actually process the transaction
         // Drop the transaction if it will expire by the time the next node receives and processes it
+        let api = perf_libs::api();
+        let max_tx_fwd_delay = if api.is_none() {
+            MAX_TRANSACTION_FORWARDING_DELAY
+        } else {
+            MAX_TRANSACTION_FORWARDING_DELAY_GPU
+        };
         let result = bank.check_transactions(
             transactions,
             None,
             &filter,
             (MAX_PROCESSING_AGE)
-                .saturating_sub(MAX_TRANSACTION_FORWARDING_DELAY)
+                .saturating_sub(max_tx_fwd_delay)
                 .saturating_sub(
                     (FORWARD_TRANSACTIONS_TO_LEADER_AT_SLOT_OFFSET * bank.ticks_per_slot()
                         / DEFAULT_TICKS_PER_SECOND) as usize,
diff --git a/core/src/cuda_runtime.rs b/core/src/cuda_runtime.rs
index 252e81b3ec..27c2e38bbc 100644
--- a/core/src/cuda_runtime.rs
+++ b/core/src/cuda_runtime.rs
@@ -5,7 +5,6 @@
 //    copies from host memory to GPU memory unless the memory is page-pinned and
 //    cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory.
 
-#[cfg(feature = "pin_gpu_memory")]
 use crate::perf_libs;
 use crate::recycler::Reset;
 use std::ops::{Deref, DerefMut};
@@ -195,12 +194,10 @@ impl<T: Clone> PinnedVec<T> {
         self.x.len()
     }
 
-    #[cfg(feature = "cuda")]
     pub fn as_ptr(&self) -> *const T {
         self.x.as_ptr()
     }
 
-    #[cfg(feature = "cuda")]
     pub fn as_mut_ptr(&mut self) -> *mut T {
         self.x.as_mut_ptr()
     }
@@ -230,23 +227,23 @@ impl<T: Clone> PinnedVec<T> {
     }
 
     fn check_ptr(&mut self, _old_ptr: *mut T, _old_capacity: usize, _from: &'static str) {
-        #[cfg(feature = "cuda")]
+        let api = perf_libs::api();
+        if api.is_some()
+            && self.pinnable
+            && (self.x.as_ptr() != _old_ptr || self.x.capacity() != _old_capacity)
         {
-            if self.pinnable && (self.x.as_ptr() != _old_ptr || self.x.capacity() != _old_capacity)
-            {
-                if self.pinned {
-                    unpin(_old_ptr);
-                }
-
-                trace!(
-                    "pinning from check_ptr old: {} size: {} from: {}",
-                    _old_capacity,
-                    self.x.capacity(),
-                    _from
-                );
-                pin(&mut self.x);
-                self.pinned = true;
+            if self.pinned {
+                unpin(_old_ptr);
             }
+
+            trace!(
+                "pinning from check_ptr old: {} size: {} from: {}",
+                _old_capacity,
+                self.x.capacity(),
+                _from
+            );
+            pin(&mut self.x);
+            self.pinned = true;
         }
     }
 }
diff --git a/sdk/src/clock.rs b/sdk/src/clock.rs
index 28145785b7..c9f0761f17 100644
--- a/sdk/src/clock.rs
+++ b/sdk/src/clock.rs
@@ -36,11 +36,9 @@ pub const MAX_PROCESSING_AGE: usize = MAX_RECENT_BLOCKHASHES / 2;
 
 /// This is maximum time consumed in forwarding a transaction from one node to next, before
 /// it can be processed in the target node
-#[cfg(feature = "cuda")]
-pub const MAX_TRANSACTION_FORWARDING_DELAY: usize = 2;
+pub const MAX_TRANSACTION_FORWARDING_DELAY_GPU: usize = 2;
 
 /// More delay is expected if CUDA is not enabled (as signature verification takes longer)
-#[cfg(not(feature = "cuda"))]
 pub const MAX_TRANSACTION_FORWARDING_DELAY: usize = 6;
 
 /// Converts a slot to a storage segment. Does not indicate that a segment is complete.